CINXE.COM

GitHub - NVIDIA/cutlass: CUDA Templates for Linear Algebra Subroutines

<!DOCTYPE html> <html lang="en" data-color-mode="auto" data-light-theme="light" data-dark-theme="dark" data-a11y-animated-images="system" data-a11y-link-underlines="true" > <head> <meta charset="utf-8"> <link rel="dns-prefetch" href="https://github.githubassets.com"> <link rel="dns-prefetch" href="https://avatars.githubusercontent.com"> <link rel="dns-prefetch" href="https://github-cloud.s3.amazonaws.com"> <link rel="dns-prefetch" href="https://user-images.githubusercontent.com/"> <link rel="preconnect" href="https://github.githubassets.com" crossorigin> <link rel="preconnect" href="https://avatars.githubusercontent.com"> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/light-7aa84bb7e11e.css" /><link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/dark-f65db3e8d171.css" /><link data-color-theme="dark_dimmed" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_dimmed-a8258e3c6dda.css" /><link data-color-theme="dark_high_contrast" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_high_contrast-7e97d834719c.css" /><link data-color-theme="dark_colorblind" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_colorblind-01d869f460be.css" /><link data-color-theme="light_colorblind" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/light_colorblind-534f3e971240.css" /><link data-color-theme="light_high_contrast" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/light_high_contrast-a8cc7d138001.css" /><link data-color-theme="light_tritanopia" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/light_tritanopia-35e9dfdc4f9f.css" /><link data-color-theme="dark_tritanopia" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_tritanopia-cf4cc5f62dfe.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-primitives-d9abecd14f1e.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-93aded0ee8a1.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/global-8bed0685a4b5.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/github-a954a02d9269.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/repository-4fce88777fa8.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/code-0210be90f4d3.css" /> <script type="application/json" id="client-env">{"locale":"en","featureFlags":["bypass_copilot_indexing_quota","copilot_immersive_file_preview","copilot_new_references_ui","copilot_bing_skill_ga","copilot_attach_folder_reference","copilot_personal_instructions","copilot_personal_instructions_templates","copilot_chat_repo_custom_instructions_preview","copilot_chat_retry_on_error","copilot_chat_persist_submitted_input","copilot_conversational_ux_history_refs","copilot_chat_shared_chat_input","copilot_chat_shared_topic_indicator","copilot_chat_shared_repo_sso_banner","copilot_editor_upsells","copilot_dotcom_chat_reduce_telemetry","copilot_implicit_context","copilot_no_floating_button","copilot_smell_icebreaker_ux","copilot_read_shared_conversation","dotcom_chat_client_side_skills","experimentation_azure_variant_endpoint","failbot_handle_non_errors","geojson_azure_maps","ghost_pilot_confidence_truncation_25","ghost_pilot_confidence_truncation_40","github_models_o3_mini_streaming","hovercard_accessibility","hovercard_close_on_escape","issues_react_remove_placeholders","issues_react_blur_item_picker_on_close","issues_react_include_bots_in_pickers","marketing_pages_search_explore_provider","remove_child_patch","sample_network_conn_type","swp_enterprise_contact_form","site_copilot_vscode_link_update","site_proxima_australia_update","issues_react_create_milestone","issues_react_cache_fix_workaround","lifecycle_label_name_updates"]}</script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/wp-runtime-ef129d6896bd.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_oddbird_popover-polyfill_dist_popover_js-9da652f58479.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_arianotify-polyfill_ariaNotify-polyfill_js-node_modules_github_mi-3abb8f-d7e6bc799724.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_failbot_failbot_ts-4600dbf2d60a.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/environment-f04cb2a9fc8c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_primer_behaviors_dist_esm_index_mjs-0dbb79f97f8f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_selector-observer_dist_index_esm_js-f690fd9ae3d5.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_relative-time-element_dist_index_js-f6da4b3fa34c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_auto-complete-element_dist_index_js-node_modules_github_catalyst_-8e9f78-a74b4e0a8a6b.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_text-expander-element_dist_index_js-78748950cb0c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_filter-input-element_dist_index_js-node_modules_github_remote-inp-b5f1d7-a1760ffda83d.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_markdown-toolbar-element_dist_index_js-ceef33f593fa.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_file-attachment-element_dist_index_js-node_modules_primer_view-co-c44a69-f0c8a795d1fd.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/github-elements-44d18ad044b3.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/element-registry-a8213dacfcb6.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_braintree_browser-detection_dist_browser-detection_js-node_modules_githu-2906d7-2a07a295af40.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_lit-html_lit-html_js-be8cb88f481b.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_index_js-node_modules_morphdom_dist_morphdom-e-7c534c-a4a1922eb55f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_turbo_dist_turbo_es2017-esm_js-e3cbe28f1638.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-893f9f-6cf3320416b8.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_color-convert_index_js-e3180fe3bcb3.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_quote-selection_dist_index_js-node_modules_github_session-resume_-947061-205cd97df772.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_updatable-content_updatable-content_ts-a1563f62660e.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_behaviors_task-list_ts-app_assets_modules_github_sso_ts-ui_packages-900dde-f48a418a99d4.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_sticky-scroll-into-view_ts-8fa27fd7fbb6.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_behaviors_ajax-error_ts-app_assets_modules_github_behaviors_include-87a4ae-e2caa5390f5a.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_behaviors_commenting_edit_ts-app_assets_modules_github_behaviors_ht-83c235-783fc7e142e5.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/behaviors-854fa1987fb5.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_delegated-events_dist_index_js-node_modules_github_catalyst_lib_index_js-f6223d90c7ba.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/notifications-global-e12489347ccf.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_virtualized-list_es_index_js-node_modules_github_template-parts_lib_index_js-96453a51f920.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-70450e-eecf0d50276f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_ref-selector_ts-0a7bffd2f129.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/codespaces-fe2c516230f3.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_filter-input-element_dist_index_js-node_modules_github_remote-inp-3eebbd-0763620ad7bf.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_decorators_js-node_modules_delegated-events_di-e161aa-9d41fb1b6c9e.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_file-attachment-element_dist_index_js-node_modules_github_remote--3c9c82-7238cfcdaa51.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/repositories-a4509a8583cd.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_index_js-node_modules_github_catalyst_lib_inde-dbbea9-26cce2010167.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/code-menu-6a5f60eab447.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/primer-react-8e38c0ecf8b7.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/react-core-4128f7c95445.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/react-lib-f1bca44e0926.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/octicons-react-611691cca2f6.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_emotion_is-prop-valid_dist_emotion-is-prop-valid_esm_js-node_modules_emo-62da9f-2df2f32ec596.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_index_js-node_modules_stacktrace-parser_dist_s-e7dcdd-f7cc96ebae76.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_oddbird_popover-polyfill_dist_popover-fn_js-55fea94174bf.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/notifications-subscriptions-menu-eff84ecbf2b6.js"></script> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.5a0ffaf77c0db0d0dac2.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/notifications-subscriptions-menu.1bcff9205c241e99cff2.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.5a0ffaf77c0db0d0dac2.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/notifications-subscriptions-menu.1bcff9205c241e99cff2.module.css" /> <title>GitHub - NVIDIA/cutlass: CUDA Templates for Linear Algebra Subroutines</title> <meta name="route-pattern" content="/:user_id/:repository" data-turbo-transient> <meta name="route-controller" content="files" data-turbo-transient> <meta name="route-action" content="disambiguate" data-turbo-transient> <meta name="current-catalog-service-hash" content="f3abb0cc802f3d7b95fc8762b94bdcb13bf39634c40c357301c4aa1d67a256fb"> <meta name="request-id" content="A7DA:2A4203:A6B08:DE6DD:67B4F961" data-pjax-transient="true"/><meta name="html-safe-nonce" content="0d44e157586a95f8aea95263e86d4ce076b8ab3d40ec50ca8a4946f18d02a48b" data-pjax-transient="true"/><meta name="visitor-payload" content="eyJyZWZlcnJlciI6IiIsInJlcXVlc3RfaWQiOiJBN0RBOjJBNDIwMzpBNkIwODpERTZERDo2N0I0Rjk2MSIsInZpc2l0b3JfaWQiOiIzODAzMTg0OTIyMzQ4NTUwNDk3IiwicmVnaW9uX2VkZ2UiOiJzb3V0aGVhc3Rhc2lhIiwicmVnaW9uX3JlbmRlciI6InNvdXRoZWFzdGFzaWEifQ==" data-pjax-transient="true"/><meta name="visitor-hmac" content="4e4600a04e818e28a06e233f6a8ffa9ac7d5ce4c383dfd0748f684022099b44c" data-pjax-transient="true"/> <meta name="hovercard-subject-tag" content="repository:112542515" data-turbo-transient> <meta name="github-keyboard-shortcuts" content="repository,copilot" data-turbo-transient="true" /> <meta name="selected-link" value="repo_source" data-turbo-transient> <link rel="assets" href="https://github.githubassets.com/"> <meta name="google-site-verification" content="Apib7-x98H0j5cPqHWwSMm6dNU4GmODRoqxLiDzdx9I"> <meta name="octolytics-url" content="https://collector.github.com/github/collect" /> <meta name="analytics-location" content="/&lt;user-name&gt;/&lt;repo-name&gt;" data-turbo-transient="true" /> <meta name="user-login" content=""> <meta name="viewport" content="width=device-width"> <meta name="description" content="CUDA Templates for Linear Algebra Subroutines. Contribute to NVIDIA/cutlass development by creating an account on GitHub."> <link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="GitHub"> <link rel="fluid-icon" href="https://github.com/fluidicon.png" title="GitHub"> <meta property="fb:app_id" content="1401488693436528"> <meta name="apple-itunes-app" content="app-id=1477376905, app-argument=https://github.com/NVIDIA/cutlass" /> <meta name="twitter:image" content="https://opengraph.githubassets.com/8b614bd8cb52ca26f89649973f9c26d07b73dc49ab559ba786fd26f04a6b2caf/NVIDIA/cutlass" /><meta name="twitter:site" content="@github" /><meta name="twitter:card" content="summary_large_image" /><meta name="twitter:title" content="GitHub - NVIDIA/cutlass: CUDA Templates for Linear Algebra Subroutines" /><meta name="twitter:description" content="CUDA Templates for Linear Algebra Subroutines. Contribute to NVIDIA/cutlass development by creating an account on GitHub." /> <meta property="og:image" content="https://opengraph.githubassets.com/8b614bd8cb52ca26f89649973f9c26d07b73dc49ab559ba786fd26f04a6b2caf/NVIDIA/cutlass" /><meta property="og:image:alt" content="CUDA Templates for Linear Algebra Subroutines. Contribute to NVIDIA/cutlass development by creating an account on GitHub." /><meta property="og:image:width" content="1200" /><meta property="og:image:height" content="600" /><meta property="og:site_name" content="GitHub" /><meta property="og:type" content="object" /><meta property="og:title" content="GitHub - NVIDIA/cutlass: CUDA Templates for Linear Algebra Subroutines" /><meta property="og:url" content="https://github.com/NVIDIA/cutlass" /><meta property="og:description" content="CUDA Templates for Linear Algebra Subroutines. Contribute to NVIDIA/cutlass development by creating an account on GitHub." /> <meta name="hostname" content="github.com"> <meta name="expected-hostname" content="github.com"> <meta http-equiv="x-pjax-version" content="e2193f347328e9aa08c3798cfa510fcace47869b2c9bbc43c09acd530868f844" data-turbo-track="reload"> <meta http-equiv="x-pjax-csp-version" content="ace39c3b6632770952207593607e6e0be0db363435a8b877b1f96abe6430f345" data-turbo-track="reload"> <meta http-equiv="x-pjax-css-version" content="1c71206221e00a0a8e77d94d48d954f34ddbd711c4a0ced954fd49cd786cfa61" data-turbo-track="reload"> <meta http-equiv="x-pjax-js-version" content="f8ee7f53fea12b8bc17141312c7b2b11c8086512777e030408b2af015e5a0fc9" data-turbo-track="reload"> <meta name="turbo-cache-control" content="no-preview" data-turbo-transient=""> <meta data-hydrostats="publish"> <meta name="go-import" content="github.com/NVIDIA/cutlass git https://github.com/NVIDIA/cutlass.git"> <meta name="octolytics-dimension-user_id" content="1728152" /><meta name="octolytics-dimension-user_login" content="NVIDIA" /><meta name="octolytics-dimension-repository_id" content="112542515" /><meta name="octolytics-dimension-repository_nwo" content="NVIDIA/cutlass" /><meta name="octolytics-dimension-repository_public" content="true" /><meta name="octolytics-dimension-repository_is_fork" content="false" /><meta name="octolytics-dimension-repository_network_root_id" content="112542515" /><meta name="octolytics-dimension-repository_network_root_nwo" content="NVIDIA/cutlass" /> <link rel="canonical" href="https://github.com/NVIDIA/cutlass" data-turbo-transient> <meta name="turbo-body-classes" content="logged-out env-production page-responsive"> <meta name="browser-stats-url" content="https://api.github.com/_private/browser/stats"> <meta name="browser-errors-url" content="https://api.github.com/_private/browser/errors"> <link rel="mask-icon" href="https://github.githubassets.com/assets/pinned-octocat-093da3e6fa40.svg" color="#000000"> <link rel="alternate icon" class="js-site-favicon" type="image/png" href="https://github.githubassets.com/favicons/favicon.png"> <link rel="icon" class="js-site-favicon" type="image/svg+xml" href="https://github.githubassets.com/favicons/favicon.svg" data-base-href="https://github.githubassets.com/favicons/favicon"> <meta name="theme-color" content="#1e2327"> <meta name="color-scheme" content="light dark" /> <link rel="manifest" href="/manifest.json" crossOrigin="use-credentials"> </head> <body class="logged-out env-production page-responsive" style="word-wrap: break-word;"> <div data-turbo-body class="logged-out env-production page-responsive" style="word-wrap: break-word;"> <div class="position-relative header-wrapper js-header-wrapper "> <a href="#start-of-content" data-skip-target-assigned="false" class="px-2 py-4 color-bg-accent-emphasis color-fg-on-emphasis show-on-focus js-skip-to-content">Skip to content</a> <span data-view-component="true" class="progress-pjax-loader Progress position-fixed width-full"> <span style="width: 0%;" data-view-component="true" class="Progress-item progress-pjax-loader-bar left-0 top-0 color-bg-accent-emphasis"></span> </span> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_ui-commands_ui-commands_ts-e571874765ef.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/keyboard-shortcuts-dialog-765cf28766da.js"></script> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.5a0ffaf77c0db0d0dac2.module.css" /> <react-partial partial-name="keyboard-shortcuts-dialog" data-ssr="false" data-attempted-ssr="false" > <script type="application/json" data-target="react-partial.embeddedData">{"props":{"docsUrl":"https://docs.github.com/get-started/accessibility/keyboard-shortcuts"}}</script> <div data-target="react-partial.reactRoot"></div> </react-partial> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-94fd67-73b675cf164a.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/sessions-2d195d11c56b.js"></script> <header class="HeaderMktg header-logged-out js-details-container js-header Details f4 py-3" role="banner" data-is-top="true" data-color-mode=light data-light-theme=light data-dark-theme=dark> <h2 class="sr-only">Navigation Menu</h2> <button type="button" class="HeaderMktg-backdrop d-lg-none border-0 position-fixed top-0 left-0 width-full height-full js-details-target" aria-label="Toggle navigation"> <span class="d-none">Toggle navigation</span> </button> <div class="d-flex flex-column flex-lg-row flex-items-center px-3 px-md-4 px-lg-5 height-full position-relative z-1"> <div class="d-flex flex-justify-between flex-items-center width-full width-lg-auto"> <div class="flex-1"> <button aria-label="Toggle navigation" aria-expanded="false" type="button" data-view-component="true" class="js-details-target js-nav-padding-recalculate js-header-menu-toggle Button--link Button--medium Button d-lg-none color-fg-inherit p-1"> <span class="Button-content"> <span class="Button-label"><div class="HeaderMenu-toggle-bar rounded my-1"></div> <div class="HeaderMenu-toggle-bar rounded my-1"></div> <div class="HeaderMenu-toggle-bar rounded my-1"></div></span> </span> </button> </div> <a class="mr-lg-3 color-fg-inherit flex-order-2 js-prevent-focus-on-mobile-nav" href="/" aria-label="Homepage" data-analytics-event="{&quot;category&quot;:&quot;Marketing nav&quot;,&quot;action&quot;:&quot;click to go to homepage&quot;,&quot;label&quot;:&quot;ref_page:Marketing;ref_cta:Logomark;ref_loc:Header&quot;}"> <svg height="32" aria-hidden="true" viewBox="0 0 24 24" version="1.1" width="32" data-view-component="true" class="octicon octicon-mark-github"> <path d="M12.5.75C6.146.75 1 5.896 1 12.25c0 5.089 3.292 9.387 7.863 10.91.575.101.79-.244.79-.546 0-.273-.014-1.178-.014-2.142-2.889.532-3.636-.704-3.866-1.35-.13-.331-.69-1.352-1.18-1.625-.402-.216-.977-.748-.014-.762.906-.014 1.553.834 1.769 1.179 1.035 1.74 2.688 1.25 3.349.948.1-.747.402-1.25.733-1.538-2.559-.287-5.232-1.279-5.232-5.678 0-1.25.445-2.285 1.178-3.09-.115-.288-.517-1.467.115-3.048 0 0 .963-.302 3.163 1.179.92-.259 1.897-.388 2.875-.388.977 0 1.955.13 2.875.388 2.2-1.495 3.162-1.179 3.162-1.179.633 1.581.23 2.76.115 3.048.733.805 1.179 1.825 1.179 3.09 0 4.413-2.688 5.39-5.247 5.678.417.36.776 1.05.776 2.128 0 1.538-.014 2.774-.014 3.162 0 .302.216.662.79.547C20.709 21.637 24 17.324 24 12.25 24 5.896 18.854.75 12.5.75Z"></path> </svg> </a> <div class="flex-1 flex-order-2 text-right"> <a href="/login?return_to=https%3A%2F%2Fgithub.com%2FNVIDIA%2Fcutlass" class="HeaderMenu-link HeaderMenu-button d-inline-flex d-lg-none flex-order-1 f5 no-underline border color-border-default rounded-2 px-2 py-1 color-fg-inherit js-prevent-focus-on-mobile-nav" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;site header menu&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;SIGN_UP&quot;,&quot;originating_url&quot;:&quot;https://github.com/NVIDIA/cutlass&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="d813b087105eb5c9241e62c1dc7cbf47960232874207f670836baca8e119fb52" data-analytics-event="{&quot;category&quot;:&quot;Marketing nav&quot;,&quot;action&quot;:&quot;click to Sign in&quot;,&quot;label&quot;:&quot;ref_page:Marketing;ref_cta:Sign in;ref_loc:Header&quot;}" > Sign in </a> </div> </div> <div class="HeaderMenu js-header-menu height-fit position-lg-relative d-lg-flex flex-column flex-auto top-0"> <div class="HeaderMenu-wrapper d-flex flex-column flex-self-start flex-lg-row flex-auto rounded rounded-lg-0"> <nav class="HeaderMenu-nav" aria-label="Global"> <ul class="d-lg-flex list-style-none"> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Product <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 d-lg-flex flex-wrap dropdown-menu-wide"> <div class="HeaderMenu-column px-lg-4 border-lg-right mb-4 mb-lg-0 pr-lg-7"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_copilot&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_copilot_link_product_navbar&quot;}" href="https://github.com/features/copilot"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-copilot color-fg-subtle mr-3"> <path d="M23.922 16.992c-.861 1.495-5.859 5.023-11.922 5.023-6.063 0-11.061-3.528-11.922-5.023A.641.641 0 0 1 0 16.736v-2.869a.841.841 0 0 1 .053-.22c.372-.935 1.347-2.292 2.605-2.656.167-.429.414-1.055.644-1.517a10.195 10.195 0 0 1-.052-1.086c0-1.331.282-2.499 1.132-3.368.397-.406.89-.717 1.474-.952 1.399-1.136 3.392-2.093 6.122-2.093 2.731 0 4.767.957 6.166 2.093.584.235 1.077.546 1.474.952.85.869 1.132 2.037 1.132 3.368 0 .368-.014.733-.052 1.086.23.462.477 1.088.644 1.517 1.258.364 2.233 1.721 2.605 2.656a.832.832 0 0 1 .053.22v2.869a.641.641 0 0 1-.078.256ZM12.172 11h-.344a4.323 4.323 0 0 1-.355.508C10.703 12.455 9.555 13 7.965 13c-1.725 0-2.989-.359-3.782-1.259a2.005 2.005 0 0 1-.085-.104L4 11.741v6.585c1.435.779 4.514 2.179 8 2.179 3.486 0 6.565-1.4 8-2.179v-6.585l-.098-.104s-.033.045-.085.104c-.793.9-2.057 1.259-3.782 1.259-1.59 0-2.738-.545-3.508-1.492a4.323 4.323 0 0 1-.355-.508h-.016.016Zm.641-2.935c.136 1.057.403 1.913.878 2.497.442.544 1.134.938 2.344.938 1.573 0 2.292-.337 2.657-.751.384-.435.558-1.15.558-2.361 0-1.14-.243-1.847-.705-2.319-.477-.488-1.319-.862-2.824-1.025-1.487-.161-2.192.138-2.533.529-.269.307-.437.808-.438 1.578v.021c0 .265.021.562.063.893Zm-1.626 0c.042-.331.063-.628.063-.894v-.02c-.001-.77-.169-1.271-.438-1.578-.341-.391-1.046-.69-2.533-.529-1.505.163-2.347.537-2.824 1.025-.462.472-.705 1.179-.705 2.319 0 1.211.175 1.926.558 2.361.365.414 1.084.751 2.657.751 1.21 0 1.902-.394 2.344-.938.475-.584.742-1.44.878-2.497Z"></path><path d="M14.5 14.25a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Zm-5 0a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Z"></path> </svg> <div> <div class="color-fg-default h4">GitHub Copilot</div> Write better code with AI </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;security&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;security_link_product_navbar&quot;}" href="https://github.com/features/security"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-shield-check color-fg-subtle mr-3"> <path d="M16.53 9.78a.75.75 0 0 0-1.06-1.06L11 13.19l-1.97-1.97a.75.75 0 0 0-1.06 1.06l2.5 2.5a.75.75 0 0 0 1.06 0l5-5Z"></path><path d="m12.54.637 8.25 2.675A1.75 1.75 0 0 1 22 4.976V10c0 6.19-3.771 10.704-9.401 12.83a1.704 1.704 0 0 1-1.198 0C5.77 20.705 2 16.19 2 10V4.976c0-.758.489-1.43 1.21-1.664L11.46.637a1.748 1.748 0 0 1 1.08 0Zm-.617 1.426-8.25 2.676a.249.249 0 0 0-.173.237V10c0 5.46 3.28 9.483 8.43 11.426a.199.199 0 0 0 .14 0C17.22 19.483 20.5 15.461 20.5 10V4.976a.25.25 0 0 0-.173-.237l-8.25-2.676a.253.253 0 0 0-.154 0Z"></path> </svg> <div> <div class="color-fg-default h4">Security</div> Find and fix vulnerabilities </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;actions&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;actions_link_product_navbar&quot;}" href="https://github.com/features/actions"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-workflow color-fg-subtle mr-3"> <path d="M1 3a2 2 0 0 1 2-2h6.5a2 2 0 0 1 2 2v6.5a2 2 0 0 1-2 2H7v4.063C7 16.355 7.644 17 8.438 17H12.5v-2.5a2 2 0 0 1 2-2H21a2 2 0 0 1 2 2V21a2 2 0 0 1-2 2h-6.5a2 2 0 0 1-2-2v-2.5H8.437A2.939 2.939 0 0 1 5.5 15.562V11.5H3a2 2 0 0 1-2-2Zm2-.5a.5.5 0 0 0-.5.5v6.5a.5.5 0 0 0 .5.5h6.5a.5.5 0 0 0 .5-.5V3a.5.5 0 0 0-.5-.5ZM14.5 14a.5.5 0 0 0-.5.5V21a.5.5 0 0 0 .5.5H21a.5.5 0 0 0 .5-.5v-6.5a.5.5 0 0 0-.5-.5Z"></path> </svg> <div> <div class="color-fg-default h4">Actions</div> Automate any workflow </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;codespaces&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;codespaces_link_product_navbar&quot;}" href="https://github.com/features/codespaces"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-codespaces color-fg-subtle mr-3"> <path d="M3.5 3.75C3.5 2.784 4.284 2 5.25 2h13.5c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 18.75 13H5.25a1.75 1.75 0 0 1-1.75-1.75Zm-2 12c0-.966.784-1.75 1.75-1.75h17.5c.966 0 1.75.784 1.75 1.75v4a1.75 1.75 0 0 1-1.75 1.75H3.25a1.75 1.75 0 0 1-1.75-1.75ZM5.25 3.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h13.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Zm-2 12a.25.25 0 0 0-.25.25v4c0 .138.112.25.25.25h17.5a.25.25 0 0 0 .25-.25v-4a.25.25 0 0 0-.25-.25Z"></path><path d="M10 17.75a.75.75 0 0 1 .75-.75h6.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1-.75-.75Zm-4 0a.75.75 0 0 1 .75-.75h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1-.75-.75Z"></path> </svg> <div> <div class="color-fg-default h4">Codespaces</div> Instant dev environments </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;issues&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;issues_link_product_navbar&quot;}" href="https://github.com/features/issues"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-issue-opened color-fg-subtle mr-3"> <path d="M12 1c6.075 0 11 4.925 11 11s-4.925 11-11 11S1 18.075 1 12 5.925 1 12 1ZM2.5 12a9.5 9.5 0 0 0 9.5 9.5 9.5 9.5 0 0 0 9.5-9.5A9.5 9.5 0 0 0 12 2.5 9.5 9.5 0 0 0 2.5 12Zm9.5 2a2 2 0 1 1-.001-3.999A2 2 0 0 1 12 14Z"></path> </svg> <div> <div class="color-fg-default h4">Issues</div> Plan and track work </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;code_review&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;code_review_link_product_navbar&quot;}" href="https://github.com/features/code-review"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-code-review color-fg-subtle mr-3"> <path d="M10.3 6.74a.75.75 0 0 1-.04 1.06l-2.908 2.7 2.908 2.7a.75.75 0 1 1-1.02 1.1l-3.5-3.25a.75.75 0 0 1 0-1.1l3.5-3.25a.75.75 0 0 1 1.06.04Zm3.44 1.06a.75.75 0 1 1 1.02-1.1l3.5 3.25a.75.75 0 0 1 0 1.1l-3.5 3.25a.75.75 0 1 1-1.02-1.1l2.908-2.7-2.908-2.7Z"></path><path d="M1.5 4.25c0-.966.784-1.75 1.75-1.75h17.5c.966 0 1.75.784 1.75 1.75v12.5a1.75 1.75 0 0 1-1.75 1.75h-9.69l-3.573 3.573A1.458 1.458 0 0 1 5 21.043V18.5H3.25a1.75 1.75 0 0 1-1.75-1.75ZM3.25 4a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h2.5a.75.75 0 0 1 .75.75v3.19l3.72-3.72a.749.749 0 0 1 .53-.22h10a.25.25 0 0 0 .25-.25V4.25a.25.25 0 0 0-.25-.25Z"></path> </svg> <div> <div class="color-fg-default h4">Code Review</div> Manage code changes </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;discussions&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;discussions_link_product_navbar&quot;}" href="https://github.com/features/discussions"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-comment-discussion color-fg-subtle mr-3"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 14.25 14H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 15.543V14H1.75A1.75 1.75 0 0 1 0 12.25v-9.5C0 1.784.784 1 1.75 1ZM1.5 2.75v9.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-9.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Z"></path><path d="M22.5 8.75a.25.25 0 0 0-.25-.25h-3.5a.75.75 0 0 1 0-1.5h3.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 22.25 20H21v1.543a1.457 1.457 0 0 1-2.487 1.03L15.939 20H10.75A1.75 1.75 0 0 1 9 18.25v-1.465a.75.75 0 0 1 1.5 0v1.465c0 .138.112.25.25.25h5.5a.75.75 0 0 1 .53.22l2.72 2.72v-2.19a.75.75 0 0 1 .75-.75h2a.25.25 0 0 0 .25-.25v-9.5Z"></path> </svg> <div> <div class="color-fg-default h4">Discussions</div> Collaborate outside of code </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;code_search&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;code_search_link_product_navbar&quot;}" href="https://github.com/features/code-search"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-code-square color-fg-subtle mr-3"> <path d="M10.3 8.24a.75.75 0 0 1-.04 1.06L7.352 12l2.908 2.7a.75.75 0 1 1-1.02 1.1l-3.5-3.25a.75.75 0 0 1 0-1.1l3.5-3.25a.75.75 0 0 1 1.06.04Zm3.44 1.06a.75.75 0 1 1 1.02-1.1l3.5 3.25a.75.75 0 0 1 0 1.1l-3.5 3.25a.75.75 0 1 1-1.02-1.1l2.908-2.7-2.908-2.7Z"></path><path d="M2 3.75C2 2.784 2.784 2 3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25Zm1.75-.25a.25.25 0 0 0-.25.25v16.5c0 .138.112.25.25.25h16.5a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25Z"></path> </svg> <div> <div class="color-fg-default h4">Code Search</div> Find more, search less </div> </a></li> </ul> </div> </div> <div class="HeaderMenu-column px-lg-4"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="product-explore-heading">Explore</span> <ul class="list-style-none f5" aria-labelledby="product-explore-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;all_features&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;all_features_link_product_navbar&quot;}" href="https://github.com/features"> All features </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;documentation&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;documentation_link_product_navbar&quot;}" href="https://docs.github.com"> Documentation <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_skills&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_skills_link_product_navbar&quot;}" href="https://skills.github.com"> GitHub Skills <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;blog&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;blog_link_product_navbar&quot;}" href="https://github.blog"> Blog <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Solutions <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 d-lg-flex flex-wrap dropdown-menu-wide"> <div class="HeaderMenu-column px-lg-4 border-lg-right mb-4 mb-lg-0 pr-lg-7"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0 pb-lg-3 mb-3 mb-lg-0"> <span class="d-block h4 color-fg-default my-1" id="solutions-by-company-size-heading">By company size</span> <ul class="list-style-none f5" aria-labelledby="solutions-by-company-size-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;enterprises&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;enterprises_link_solutions_navbar&quot;}" href="https://github.com/enterprise"> Enterprises </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;small_and_medium_teams&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;small_and_medium_teams_link_solutions_navbar&quot;}" href="https://github.com/team"> Small and medium teams </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;startups&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;startups_link_solutions_navbar&quot;}" href="https://github.com/enterprise/startups"> Startups </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;nonprofits&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;nonprofits_link_solutions_navbar&quot;}" href="/solutions/industry/nonprofits"> Nonprofits </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="solutions-by-use-case-heading">By use case</span> <ul class="list-style-none f5" aria-labelledby="solutions-by-use-case-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;devsecops&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;devsecops_link_solutions_navbar&quot;}" href="/solutions/use-case/devsecops"> DevSecOps </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;devops&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;devops_link_solutions_navbar&quot;}" href="/solutions/use-case/devops"> DevOps </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;ci_cd&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;ci_cd_link_solutions_navbar&quot;}" href="/solutions/use-case/ci-cd"> CI/CD </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;view_all_use_cases&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;view_all_use_cases_link_solutions_navbar&quot;}" href="/solutions/use-case"> View all use cases </a></li> </ul> </div> </div> <div class="HeaderMenu-column px-lg-4"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="solutions-by-industry-heading">By industry</span> <ul class="list-style-none f5" aria-labelledby="solutions-by-industry-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;healthcare&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;healthcare_link_solutions_navbar&quot;}" href="/solutions/industry/healthcare"> Healthcare </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;financial_services&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;financial_services_link_solutions_navbar&quot;}" href="/solutions/industry/financial-services"> Financial services </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;manufacturing&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;manufacturing_link_solutions_navbar&quot;}" href="/solutions/industry/manufacturing"> Manufacturing </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;government&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;government_link_solutions_navbar&quot;}" href="/solutions/industry/government"> Government </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;view_all_industries&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;view_all_industries_link_solutions_navbar&quot;}" href="/solutions/industry"> View all industries </a></li> </ul> </div> </div> <div class="HeaderMenu-trailing-link rounded-bottom-2 flex-shrink-0 mt-lg-4 px-lg-4 py-4 py-lg-3 f5 text-semibold"> <a href="/solutions"> View all solutions <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-right HeaderMenu-trailing-link-icon"> <path d="M6.22 3.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L9.94 8 6.22 4.28a.75.75 0 0 1 0-1.06Z"></path> </svg> </a> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Resources <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 d-lg-flex flex-wrap dropdown-menu-wide"> <div class="HeaderMenu-column px-lg-4 border-lg-right mb-4 mb-lg-0 pr-lg-7"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="resources-topics-heading">Topics</span> <ul class="list-style-none f5" aria-labelledby="resources-topics-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;ai&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;ai_link_resources_navbar&quot;}" href="/resources/articles/ai"> AI </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;devops&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;devops_link_resources_navbar&quot;}" href="/resources/articles/devops"> DevOps </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;security&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;security_link_resources_navbar&quot;}" href="/resources/articles/security"> Security </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;software_development&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;software_development_link_resources_navbar&quot;}" href="/resources/articles/software-development"> Software Development </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;view_all&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;view_all_link_resources_navbar&quot;}" href="/resources/articles"> View all </a></li> </ul> </div> </div> <div class="HeaderMenu-column px-lg-4"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="resources-explore-heading">Explore</span> <ul class="list-style-none f5" aria-labelledby="resources-explore-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;learning_pathways&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;learning_pathways_link_resources_navbar&quot;}" href="https://resources.github.com/learn/pathways"> Learning Pathways <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;white_papers_ebooks_webinars&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;white_papers_ebooks_webinars_link_resources_navbar&quot;}" href="https://resources.github.com"> White papers, Ebooks, Webinars <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;customer_stories&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;customer_stories_link_resources_navbar&quot;}" href="https://github.com/customer-stories"> Customer Stories </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;partners&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;partners_link_resources_navbar&quot;}" href="https://partner.github.com"> Partners <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;executive_insights&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;executive_insights_link_resources_navbar&quot;}" href="https://github.com/solutions/executive-insights"> Executive Insights </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Open Source <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 px-lg-4"> <div class="HeaderMenu-column"> <div class="border-bottom pb-3 pb-lg-0 pb-lg-3 mb-3 mb-lg-0 mb-lg-3"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_sponsors&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_sponsors_link_open_source_navbar&quot;}" href="/sponsors"> <div> <div class="color-fg-default h4">GitHub Sponsors</div> Fund open source developers </div> </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 pb-lg-3 mb-3 mb-lg-0 mb-lg-3"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;the_readme_project&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;the_readme_project_link_open_source_navbar&quot;}" href="https://github.com/readme"> <div> <div class="color-fg-default h4">The ReadME Project</div> GitHub community articles </div> </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="open-source-repositories-heading">Repositories</span> <ul class="list-style-none f5" aria-labelledby="open-source-repositories-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;topics&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;topics_link_open_source_navbar&quot;}" href="https://github.com/topics"> Topics </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;trending&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;trending_link_open_source_navbar&quot;}" href="https://github.com/trending"> Trending </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;collections&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;collections_link_open_source_navbar&quot;}" href="https://github.com/collections"> Collections </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Enterprise <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 px-lg-4"> <div class="HeaderMenu-column"> <div class="border-bottom pb-3 pb-lg-0 pb-lg-3 mb-3 mb-lg-0 mb-lg-3"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;enterprise_platform&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;enterprise_platform_link_enterprise_navbar&quot;}" href="/enterprise"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-stack color-fg-subtle mr-3"> <path d="M11.063 1.456a1.749 1.749 0 0 1 1.874 0l8.383 5.316a1.751 1.751 0 0 1 0 2.956l-8.383 5.316a1.749 1.749 0 0 1-1.874 0L2.68 9.728a1.751 1.751 0 0 1 0-2.956Zm1.071 1.267a.25.25 0 0 0-.268 0L3.483 8.039a.25.25 0 0 0 0 .422l8.383 5.316a.25.25 0 0 0 .268 0l8.383-5.316a.25.25 0 0 0 0-.422Z"></path><path d="M1.867 12.324a.75.75 0 0 1 1.035-.232l8.964 5.685a.25.25 0 0 0 .268 0l8.964-5.685a.75.75 0 0 1 .804 1.267l-8.965 5.685a1.749 1.749 0 0 1-1.874 0l-8.965-5.685a.75.75 0 0 1-.231-1.035Z"></path><path d="M1.867 16.324a.75.75 0 0 1 1.035-.232l8.964 5.685a.25.25 0 0 0 .268 0l8.964-5.685a.75.75 0 0 1 .804 1.267l-8.965 5.685a1.749 1.749 0 0 1-1.874 0l-8.965-5.685a.75.75 0 0 1-.231-1.035Z"></path> </svg> <div> <div class="color-fg-default h4">Enterprise platform</div> AI-powered developer platform </div> </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="enterprise-available-add-ons-heading">Available add-ons</span> <ul class="list-style-none f5" aria-labelledby="enterprise-available-add-ons-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;advanced_security&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;advanced_security_link_enterprise_navbar&quot;}" href="https://github.com/enterprise/advanced-security"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-shield-check color-fg-subtle mr-3"> <path d="M16.53 9.78a.75.75 0 0 0-1.06-1.06L11 13.19l-1.97-1.97a.75.75 0 0 0-1.06 1.06l2.5 2.5a.75.75 0 0 0 1.06 0l5-5Z"></path><path d="m12.54.637 8.25 2.675A1.75 1.75 0 0 1 22 4.976V10c0 6.19-3.771 10.704-9.401 12.83a1.704 1.704 0 0 1-1.198 0C5.77 20.705 2 16.19 2 10V4.976c0-.758.489-1.43 1.21-1.664L11.46.637a1.748 1.748 0 0 1 1.08 0Zm-.617 1.426-8.25 2.676a.249.249 0 0 0-.173.237V10c0 5.46 3.28 9.483 8.43 11.426a.199.199 0 0 0 .14 0C17.22 19.483 20.5 15.461 20.5 10V4.976a.25.25 0 0 0-.173-.237l-8.25-2.676a.253.253 0 0 0-.154 0Z"></path> </svg> <div> <div class="color-fg-default h4">Advanced Security</div> Enterprise-grade security features </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_copilot&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_copilot_link_enterprise_navbar&quot;}" href="/features/copilot#enterprise"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-copilot color-fg-subtle mr-3"> <path d="M23.922 16.992c-.861 1.495-5.859 5.023-11.922 5.023-6.063 0-11.061-3.528-11.922-5.023A.641.641 0 0 1 0 16.736v-2.869a.841.841 0 0 1 .053-.22c.372-.935 1.347-2.292 2.605-2.656.167-.429.414-1.055.644-1.517a10.195 10.195 0 0 1-.052-1.086c0-1.331.282-2.499 1.132-3.368.397-.406.89-.717 1.474-.952 1.399-1.136 3.392-2.093 6.122-2.093 2.731 0 4.767.957 6.166 2.093.584.235 1.077.546 1.474.952.85.869 1.132 2.037 1.132 3.368 0 .368-.014.733-.052 1.086.23.462.477 1.088.644 1.517 1.258.364 2.233 1.721 2.605 2.656a.832.832 0 0 1 .053.22v2.869a.641.641 0 0 1-.078.256ZM12.172 11h-.344a4.323 4.323 0 0 1-.355.508C10.703 12.455 9.555 13 7.965 13c-1.725 0-2.989-.359-3.782-1.259a2.005 2.005 0 0 1-.085-.104L4 11.741v6.585c1.435.779 4.514 2.179 8 2.179 3.486 0 6.565-1.4 8-2.179v-6.585l-.098-.104s-.033.045-.085.104c-.793.9-2.057 1.259-3.782 1.259-1.59 0-2.738-.545-3.508-1.492a4.323 4.323 0 0 1-.355-.508h-.016.016Zm.641-2.935c.136 1.057.403 1.913.878 2.497.442.544 1.134.938 2.344.938 1.573 0 2.292-.337 2.657-.751.384-.435.558-1.15.558-2.361 0-1.14-.243-1.847-.705-2.319-.477-.488-1.319-.862-2.824-1.025-1.487-.161-2.192.138-2.533.529-.269.307-.437.808-.438 1.578v.021c0 .265.021.562.063.893Zm-1.626 0c.042-.331.063-.628.063-.894v-.02c-.001-.77-.169-1.271-.438-1.578-.341-.391-1.046-.69-2.533-.529-1.505.163-2.347.537-2.824 1.025-.462.472-.705 1.179-.705 2.319 0 1.211.175 1.926.558 2.361.365.414 1.084.751 2.657.751 1.21 0 1.902-.394 2.344-.938.475-.584.742-1.44.878-2.497Z"></path><path d="M14.5 14.25a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Zm-5 0a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Z"></path> </svg> <div> <div class="color-fg-default h4">GitHub Copilot</div> Enterprise-grade AI features </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;premium_support&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;premium_support_link_enterprise_navbar&quot;}" href="/premium-support"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-comment-discussion color-fg-subtle mr-3"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 14.25 14H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 15.543V14H1.75A1.75 1.75 0 0 1 0 12.25v-9.5C0 1.784.784 1 1.75 1ZM1.5 2.75v9.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-9.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Z"></path><path d="M22.5 8.75a.25.25 0 0 0-.25-.25h-3.5a.75.75 0 0 1 0-1.5h3.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 22.25 20H21v1.543a1.457 1.457 0 0 1-2.487 1.03L15.939 20H10.75A1.75 1.75 0 0 1 9 18.25v-1.465a.75.75 0 0 1 1.5 0v1.465c0 .138.112.25.25.25h5.5a.75.75 0 0 1 .53.22l2.72 2.72v-2.19a.75.75 0 0 1 .75-.75h2a.25.25 0 0 0 .25-.25v-9.5Z"></path> </svg> <div> <div class="color-fg-default h4">Premium Support</div> Enterprise-grade 24/7 support </div> </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <a class="HeaderMenu-link no-underline px-0 px-lg-2 py-3 py-lg-2 d-block d-lg-inline-block" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;pricing&quot;,&quot;context&quot;:&quot;global&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;pricing_link_global_navbar&quot;}" href="https://github.com/pricing">Pricing</a> </li> </ul> </nav> <div class="d-flex flex-column flex-lg-row width-full flex-justify-end flex-lg-items-center text-center mt-3 mt-lg-0 text-lg-left ml-lg-3"> <qbsearch-input class="search-input" data-scope="repo:NVIDIA/cutlass" data-custom-scopes-path="/search/custom_scopes" data-delete-custom-scopes-csrf="7CDtmS5fTIpIGVYAgENdHrduggfIUoeu-wn_nGRZs4EbrK1CntvsQ03WwkI1fyJNk3Peq7uQcn_HRhKDN33P1g" data-max-custom-scopes="10" data-header-redesign-enabled="false" data-initial-value="" data-blackbird-suggestions-path="/search/suggestions" data-jump-to-suggestions-path="/_graphql/GetSuggestedNavigationDestinations" data-current-repository="NVIDIA/cutlass" data-current-org="NVIDIA" data-current-owner="" data-logged-in="false" data-copilot-chat-enabled="false" data-nl-search-enabled="false" data-retain-scroll-position="true"> <div class="search-input-container search-with-dialog position-relative d-flex flex-row flex-items-center mr-4 rounded" data-action="click:qbsearch-input#searchInputContainerClicked" > <button type="button" class="header-search-button placeholder input-button form-control d-flex flex-1 flex-self-stretch flex-items-center no-wrap width-full py-0 pl-2 pr-0 text-left border-0 box-shadow-none" data-target="qbsearch-input.inputButton" aria-label="Search or jump to…" aria-haspopup="dialog" placeholder="Search or jump to..." data-hotkey=s,/ autocapitalize="off" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;searchbar&quot;,&quot;context&quot;:&quot;global&quot;,&quot;tag&quot;:&quot;input&quot;,&quot;label&quot;:&quot;searchbar_input_global_navbar&quot;}" data-action="click:qbsearch-input#handleExpand" > <div class="mr-2 color-fg-muted"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-search"> <path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path> </svg> </div> <span class="flex-1" data-target="qbsearch-input.inputButtonText">Search or jump to...</span> <div class="d-flex" data-target="qbsearch-input.hotkeyIndicator"> <svg xmlns="http://www.w3.org/2000/svg" width="22" height="20" aria-hidden="true" class="mr-1"><path fill="none" stroke="#979A9C" opacity=".4" d="M3.5.5h12c1.7 0 3 1.3 3 3v13c0 1.7-1.3 3-3 3h-12c-1.7 0-3-1.3-3-3v-13c0-1.7 1.3-3 3-3z"></path><path fill="#979A9C" d="M11.8 6L8 15.1h-.9L10.8 6h1z"></path></svg> </div> </button> <input type="hidden" name="type" class="js-site-search-type-field"> <div class="Overlay--hidden " data-modal-dialog-overlay> <modal-dialog data-action="close:qbsearch-input#handleClose cancel:qbsearch-input#handleClose" data-target="qbsearch-input.searchSuggestionsDialog" role="dialog" id="search-suggestions-dialog" aria-modal="true" aria-labelledby="search-suggestions-dialog-header" data-view-component="true" class="Overlay Overlay--width-large Overlay--height-auto"> <h1 id="search-suggestions-dialog-header" class="sr-only">Search code, repositories, users, issues, pull requests...</h1> <div class="Overlay-body Overlay-body--paddingNone"> <div data-view-component="true"> <div class="search-suggestions position-fixed width-full color-shadow-large border color-fg-default color-bg-default overflow-hidden d-flex flex-column query-builder-container" style="border-radius: 12px;" data-target="qbsearch-input.queryBuilderContainer" hidden > <!-- '"` --><!-- </textarea></xmp> --></option></form><form id="query-builder-test-form" action="" accept-charset="UTF-8" method="get"> <query-builder data-target="qbsearch-input.queryBuilder" id="query-builder-query-builder-test" data-filter-key=":" data-view-component="true" class="QueryBuilder search-query-builder"> <div class="FormControl FormControl--fullWidth"> <label id="query-builder-test-label" for="query-builder-test" class="FormControl-label sr-only"> Search </label> <div class="QueryBuilder-StyledInput width-fit " data-target="query-builder.styledInput" > <span id="query-builder-test-leadingvisual-wrap" class="FormControl-input-leadingVisualWrap QueryBuilder-leadingVisualWrap"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-search FormControl-input-leadingVisual"> <path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path> </svg> </span> <div data-target="query-builder.styledInputContainer" class="QueryBuilder-StyledInputContainer"> <div aria-hidden="true" class="QueryBuilder-StyledInputContent" data-target="query-builder.styledInputContent" ></div> <div class="QueryBuilder-InputWrapper"> <div aria-hidden="true" class="QueryBuilder-Sizer" data-target="query-builder.sizer"></div> <input id="query-builder-test" name="query-builder-test" value="" autocomplete="off" type="text" role="combobox" spellcheck="false" aria-expanded="false" aria-describedby="validation-d7785cb5-7f88-432d-a1bf-d533dacddf8b" data-target="query-builder.input" data-action=" input:query-builder#inputChange blur:query-builder#inputBlur keydown:query-builder#inputKeydown focus:query-builder#inputFocus " data-view-component="true" class="FormControl-input QueryBuilder-Input FormControl-medium" /> </div> </div> <span class="sr-only" id="query-builder-test-clear">Clear</span> <button role="button" id="query-builder-test-clear-button" aria-labelledby="query-builder-test-clear query-builder-test-label" data-target="query-builder.clearButton" data-action=" click:query-builder#clear focus:query-builder#clearButtonFocus blur:query-builder#clearButtonBlur " variant="small" hidden="hidden" type="button" data-view-component="true" class="Button Button--iconOnly Button--invisible Button--medium mr-1 px-2 py-0 d-flex flex-items-center rounded-1 color-fg-muted"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x-circle-fill Button-visual"> <path d="M2.343 13.657A8 8 0 1 1 13.658 2.343 8 8 0 0 1 2.343 13.657ZM6.03 4.97a.751.751 0 0 0-1.042.018.751.751 0 0 0-.018 1.042L6.94 8 4.97 9.97a.749.749 0 0 0 .326 1.275.749.749 0 0 0 .734-.215L8 9.06l1.97 1.97a.749.749 0 0 0 1.275-.326.749.749 0 0 0-.215-.734L9.06 8l1.97-1.97a.749.749 0 0 0-.326-1.275.749.749 0 0 0-.734.215L8 6.94Z"></path> </svg> </button> </div> <template id="search-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-search"> <path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path> </svg> </template> <template id="code-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code"> <path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path> </svg> </template> <template id="file-code-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-file-code"> <path d="M4 1.75C4 .784 4.784 0 5.75 0h5.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v8.586A1.75 1.75 0 0 1 14.25 15h-9a.75.75 0 0 1 0-1.5h9a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 10 4.25V1.5H5.75a.25.25 0 0 0-.25.25v2.5a.75.75 0 0 1-1.5 0Zm1.72 4.97a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734l1.47-1.47-1.47-1.47a.75.75 0 0 1 0-1.06ZM3.28 7.78 1.81 9.25l1.47 1.47a.751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018l-2-2a.75.75 0 0 1 0-1.06l2-2a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Zm8.22-6.218V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path> </svg> </template> <template id="history-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-history"> <path d="m.427 1.927 1.215 1.215a8.002 8.002 0 1 1-1.6 5.685.75.75 0 1 1 1.493-.154 6.5 6.5 0 1 0 1.18-4.458l1.358 1.358A.25.25 0 0 1 3.896 6H.25A.25.25 0 0 1 0 5.75V2.104a.25.25 0 0 1 .427-.177ZM7.75 4a.75.75 0 0 1 .75.75v2.992l2.028.812a.75.75 0 0 1-.557 1.392l-2.5-1A.751.751 0 0 1 7 8.25v-3.5A.75.75 0 0 1 7.75 4Z"></path> </svg> </template> <template id="repo-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo"> <path d="M2 2.5A2.5 2.5 0 0 1 4.5 0h8.75a.75.75 0 0 1 .75.75v12.5a.75.75 0 0 1-.75.75h-2.5a.75.75 0 0 1 0-1.5h1.75v-2h-8a1 1 0 0 0-.714 1.7.75.75 0 1 1-1.072 1.05A2.495 2.495 0 0 1 2 11.5Zm10.5-1h-8a1 1 0 0 0-1 1v6.708A2.486 2.486 0 0 1 4.5 9h8ZM5 12.25a.25.25 0 0 1 .25-.25h3.5a.25.25 0 0 1 .25.25v3.25a.25.25 0 0 1-.4.2l-1.45-1.087a.249.249 0 0 0-.3 0L5.4 15.7a.25.25 0 0 1-.4-.2Z"></path> </svg> </template> <template id="bookmark-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-bookmark"> <path d="M3 2.75C3 1.784 3.784 1 4.75 1h6.5c.966 0 1.75.784 1.75 1.75v11.5a.75.75 0 0 1-1.227.579L8 11.722l-3.773 3.107A.751.751 0 0 1 3 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v9.91l3.023-2.489a.75.75 0 0 1 .954 0l3.023 2.49V2.75a.25.25 0 0 0-.25-.25Z"></path> </svg> </template> <template id="plus-circle-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-plus-circle"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm7.25-3.25v2.5h2.5a.75.75 0 0 1 0 1.5h-2.5v2.5a.75.75 0 0 1-1.5 0v-2.5h-2.5a.75.75 0 0 1 0-1.5h2.5v-2.5a.75.75 0 0 1 1.5 0Z"></path> </svg> </template> <template id="circle-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> </template> <template id="trash-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-trash"> <path d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"></path> </svg> </template> <template id="team-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-people"> <path d="M2 5.5a3.5 3.5 0 1 1 5.898 2.549 5.508 5.508 0 0 1 3.034 4.084.75.75 0 1 1-1.482.235 4 4 0 0 0-7.9 0 .75.75 0 0 1-1.482-.236A5.507 5.507 0 0 1 3.102 8.05 3.493 3.493 0 0 1 2 5.5ZM11 4a3.001 3.001 0 0 1 2.22 5.018 5.01 5.01 0 0 1 2.56 3.012.749.749 0 0 1-.885.954.752.752 0 0 1-.549-.514 3.507 3.507 0 0 0-2.522-2.372.75.75 0 0 1-.574-.73v-.352a.75.75 0 0 1 .416-.672A1.5 1.5 0 0 0 11 5.5.75.75 0 0 1 11 4Zm-5.5-.5a2 2 0 1 0-.001 3.999A2 2 0 0 0 5.5 3.5Z"></path> </svg> </template> <template id="project-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-project"> <path d="M1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25V1.75C0 .784.784 0 1.75 0ZM1.5 1.75v12.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25ZM11.75 3a.75.75 0 0 1 .75.75v7.5a.75.75 0 0 1-1.5 0v-7.5a.75.75 0 0 1 .75-.75Zm-8.25.75a.75.75 0 0 1 1.5 0v5.5a.75.75 0 0 1-1.5 0ZM8 3a.75.75 0 0 1 .75.75v3.5a.75.75 0 0 1-1.5 0v-3.5A.75.75 0 0 1 8 3Z"></path> </svg> </template> <template id="pencil-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-pencil"> <path d="M11.013 1.427a1.75 1.75 0 0 1 2.474 0l1.086 1.086a1.75 1.75 0 0 1 0 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 0 1-.927-.928l.929-3.25c.081-.286.235-.547.445-.758l8.61-8.61Zm.176 4.823L9.75 4.81l-6.286 6.287a.253.253 0 0 0-.064.108l-.558 1.953 1.953-.558a.253.253 0 0 0 .108-.064Zm1.238-3.763a.25.25 0 0 0-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 0 0 0-.354Z"></path> </svg> </template> <template id="copilot-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copilot"> <path d="M7.998 15.035c-4.562 0-7.873-2.914-7.998-3.749V9.338c.085-.628.677-1.686 1.588-2.065.013-.07.024-.143.036-.218.029-.183.06-.384.126-.612-.201-.508-.254-1.084-.254-1.656 0-.87.128-1.769.693-2.484.579-.733 1.494-1.124 2.724-1.261 1.206-.134 2.262.034 2.944.765.05.053.096.108.139.165.044-.057.094-.112.143-.165.682-.731 1.738-.899 2.944-.765 1.23.137 2.145.528 2.724 1.261.566.715.693 1.614.693 2.484 0 .572-.053 1.148-.254 1.656.066.228.098.429.126.612.012.076.024.148.037.218.924.385 1.522 1.471 1.591 2.095v1.872c0 .766-3.351 3.795-8.002 3.795Zm0-1.485c2.28 0 4.584-1.11 5.002-1.433V7.862l-.023-.116c-.49.21-1.075.291-1.727.291-1.146 0-2.059-.327-2.71-.991A3.222 3.222 0 0 1 8 6.303a3.24 3.24 0 0 1-.544.743c-.65.664-1.563.991-2.71.991-.652 0-1.236-.081-1.727-.291l-.023.116v4.255c.419.323 2.722 1.433 5.002 1.433ZM6.762 2.83c-.193-.206-.637-.413-1.682-.297-1.019.113-1.479.404-1.713.7-.247.312-.369.789-.369 1.554 0 .793.129 1.171.308 1.371.162.181.519.379 1.442.379.853 0 1.339-.235 1.638-.54.315-.322.527-.827.617-1.553.117-.935-.037-1.395-.241-1.614Zm4.155-.297c-1.044-.116-1.488.091-1.681.297-.204.219-.359.679-.242 1.614.091.726.303 1.231.618 1.553.299.305.784.54 1.638.54.922 0 1.28-.198 1.442-.379.179-.2.308-.578.308-1.371 0-.765-.123-1.242-.37-1.554-.233-.296-.693-.587-1.713-.7Z"></path><path d="M6.25 9.037a.75.75 0 0 1 .75.75v1.501a.75.75 0 0 1-1.5 0V9.787a.75.75 0 0 1 .75-.75Zm4.25.75v1.501a.75.75 0 0 1-1.5 0V9.787a.75.75 0 0 1 1.5 0Z"></path> </svg> </template> <template id="copilot-error-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copilot-error"> <path d="M16 11.24c0 .112-.072.274-.21.467L13 9.688V7.862l-.023-.116c-.49.21-1.075.291-1.727.291-.198 0-.388-.009-.571-.029L6.833 5.226a4.01 4.01 0 0 0 .17-.782c.117-.935-.037-1.395-.241-1.614-.193-.206-.637-.413-1.682-.297-.683.076-1.115.231-1.395.415l-1.257-.91c.579-.564 1.413-.877 2.485-.996 1.206-.134 2.262.034 2.944.765.05.053.096.108.139.165.044-.057.094-.112.143-.165.682-.731 1.738-.899 2.944-.765 1.23.137 2.145.528 2.724 1.261.566.715.693 1.614.693 2.484 0 .572-.053 1.148-.254 1.656.066.228.098.429.126.612.012.076.024.148.037.218.924.385 1.522 1.471 1.591 2.095Zm-5.083-8.707c-1.044-.116-1.488.091-1.681.297-.204.219-.359.679-.242 1.614.091.726.303 1.231.618 1.553.299.305.784.54 1.638.54.922 0 1.28-.198 1.442-.379.179-.2.308-.578.308-1.371 0-.765-.123-1.242-.37-1.554-.233-.296-.693-.587-1.713-.7Zm2.511 11.074c-1.393.776-3.272 1.428-5.43 1.428-4.562 0-7.873-2.914-7.998-3.749V9.338c.085-.628.677-1.686 1.588-2.065.013-.07.024-.143.036-.218.029-.183.06-.384.126-.612-.18-.455-.241-.963-.252-1.475L.31 4.107A.747.747 0 0 1 0 3.509V3.49a.748.748 0 0 1 .625-.73c.156-.026.306.047.435.139l14.667 10.578a.592.592 0 0 1 .227.264.752.752 0 0 1 .046.249v.022a.75.75 0 0 1-1.19.596Zm-1.367-.991L5.635 7.964a5.128 5.128 0 0 1-.889.073c-.652 0-1.236-.081-1.727-.291l-.023.116v4.255c.419.323 2.722 1.433 5.002 1.433 1.539 0 3.089-.505 4.063-.934Z"></path> </svg> </template> <template id="workflow-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-workflow"> <path d="M0 1.75C0 .784.784 0 1.75 0h3.5C6.216 0 7 .784 7 1.75v3.5A1.75 1.75 0 0 1 5.25 7H4v4a1 1 0 0 0 1 1h4v-1.25C9 9.784 9.784 9 10.75 9h3.5c.966 0 1.75.784 1.75 1.75v3.5A1.75 1.75 0 0 1 14.25 16h-3.5A1.75 1.75 0 0 1 9 14.25v-.75H5A2.5 2.5 0 0 1 2.5 11V7h-.75A1.75 1.75 0 0 1 0 5.25Zm1.75-.25a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Zm9 9a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Z"></path> </svg> </template> <template id="book-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> </template> <template id="code-review-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code-review"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0 1 14.25 13H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 14.543V13H1.75A1.75 1.75 0 0 1 0 11.25v-8.5C0 1.784.784 1 1.75 1ZM1.5 2.75v8.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-8.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Zm5.28 1.72a.75.75 0 0 1 0 1.06L5.31 7l1.47 1.47a.751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018l-2-2a.75.75 0 0 1 0-1.06l2-2a.75.75 0 0 1 1.06 0Zm2.44 0a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L10.69 7 9.22 5.53a.75.75 0 0 1 0-1.06Z"></path> </svg> </template> <template id="codespaces-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-codespaces"> <path d="M0 11.25c0-.966.784-1.75 1.75-1.75h12.5c.966 0 1.75.784 1.75 1.75v3A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25Zm2-9.5C2 .784 2.784 0 3.75 0h8.5C13.216 0 14 .784 14 1.75v5a1.75 1.75 0 0 1-1.75 1.75h-8.5A1.75 1.75 0 0 1 2 6.75Zm1.75-.25a.25.25 0 0 0-.25.25v5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-5a.25.25 0 0 0-.25-.25Zm-2 9.5a.25.25 0 0 0-.25.25v3c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-3a.25.25 0 0 0-.25-.25Z"></path><path d="M7 12.75a.75.75 0 0 1 .75-.75h4.5a.75.75 0 0 1 0 1.5h-4.5a.75.75 0 0 1-.75-.75Zm-4 0a.75.75 0 0 1 .75-.75h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1-.75-.75Z"></path> </svg> </template> <template id="comment-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment"> <path d="M1 2.75C1 1.784 1.784 1 2.75 1h10.5c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 13.25 12H9.06l-2.573 2.573A1.458 1.458 0 0 1 4 13.543V12H2.75A1.75 1.75 0 0 1 1 10.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h4.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"></path> </svg> </template> <template id="comment-discussion-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment-discussion"> <path d="M1.75 1h8.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 10.25 10H7.061l-2.574 2.573A1.458 1.458 0 0 1 2 11.543V10h-.25A1.75 1.75 0 0 1 0 8.25v-5.5C0 1.784.784 1 1.75 1ZM1.5 2.75v5.5c0 .138.112.25.25.25h1a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h3.5a.25.25 0 0 0 .25-.25v-5.5a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25Zm13 2a.25.25 0 0 0-.25-.25h-.5a.75.75 0 0 1 0-1.5h.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 14.25 12H14v1.543a1.458 1.458 0 0 1-2.487 1.03L9.22 12.28a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l2.22 2.22v-2.19a.75.75 0 0 1 .75-.75h1a.25.25 0 0 0 .25-.25Z"></path> </svg> </template> <template id="organization-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-organization"> <path d="M1.75 16A1.75 1.75 0 0 1 0 14.25V1.75C0 .784.784 0 1.75 0h8.5C11.216 0 12 .784 12 1.75v12.5c0 .085-.006.168-.018.25h2.268a.25.25 0 0 0 .25-.25V8.285a.25.25 0 0 0-.111-.208l-1.055-.703a.749.749 0 1 1 .832-1.248l1.055.703c.487.325.779.871.779 1.456v5.965A1.75 1.75 0 0 1 14.25 16h-3.5a.766.766 0 0 1-.197-.026c-.099.017-.2.026-.303.026h-3a.75.75 0 0 1-.75-.75V14h-1v1.25a.75.75 0 0 1-.75.75Zm-.25-1.75c0 .138.112.25.25.25H4v-1.25a.75.75 0 0 1 .75-.75h2.5a.75.75 0 0 1 .75.75v1.25h2.25a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25ZM3.75 6h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5ZM3 3.75A.75.75 0 0 1 3.75 3h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 3 3.75Zm4 3A.75.75 0 0 1 7.75 6h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 7 6.75ZM7.75 3h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5ZM3 9.75A.75.75 0 0 1 3.75 9h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 3 9.75ZM7.75 9h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5Z"></path> </svg> </template> <template id="rocket-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-rocket"> <path d="M14.064 0h.186C15.216 0 16 .784 16 1.75v.186a8.752 8.752 0 0 1-2.564 6.186l-.458.459c-.314.314-.641.616-.979.904v3.207c0 .608-.315 1.172-.833 1.49l-2.774 1.707a.749.749 0 0 1-1.11-.418l-.954-3.102a1.214 1.214 0 0 1-.145-.125L3.754 9.816a1.218 1.218 0 0 1-.124-.145L.528 8.717a.749.749 0 0 1-.418-1.11l1.71-2.774A1.748 1.748 0 0 1 3.31 4h3.204c.288-.338.59-.665.904-.979l.459-.458A8.749 8.749 0 0 1 14.064 0ZM8.938 3.623h-.002l-.458.458c-.76.76-1.437 1.598-2.02 2.5l-1.5 2.317 2.143 2.143 2.317-1.5c.902-.583 1.74-1.26 2.499-2.02l.459-.458a7.25 7.25 0 0 0 2.123-5.127V1.75a.25.25 0 0 0-.25-.25h-.186a7.249 7.249 0 0 0-5.125 2.123ZM3.56 14.56c-.732.732-2.334 1.045-3.005 1.148a.234.234 0 0 1-.201-.064.234.234 0 0 1-.064-.201c.103-.671.416-2.273 1.15-3.003a1.502 1.502 0 1 1 2.12 2.12Zm6.94-3.935c-.088.06-.177.118-.266.175l-2.35 1.521.548 1.783 1.949-1.2a.25.25 0 0 0 .119-.213ZM3.678 8.116 5.2 5.766c.058-.09.117-.178.176-.266H3.309a.25.25 0 0 0-.213.119l-1.2 1.95ZM12 5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> </template> <template id="shield-check-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-shield-check"> <path d="m8.533.133 5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667l5.25-1.68a1.748 1.748 0 0 1 1.066 0Zm-.61 1.429.001.001-5.25 1.68a.251.251 0 0 0-.174.237V7c0 1.36.275 2.666 1.057 3.859.784 1.194 2.121 2.342 4.366 3.298a.196.196 0 0 0 .154 0c2.245-.957 3.582-2.103 4.366-3.297C13.225 9.666 13.5 8.358 13.5 7V3.48a.25.25 0 0 0-.174-.238l-5.25-1.68a.25.25 0 0 0-.153 0ZM11.28 6.28l-3.5 3.5a.75.75 0 0 1-1.06 0l-1.5-1.5a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l.97.97 2.97-2.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> </template> <template id="heart-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-heart"> <path d="m8 14.25.345.666a.75.75 0 0 1-.69 0l-.008-.004-.018-.01a7.152 7.152 0 0 1-.31-.17 22.055 22.055 0 0 1-3.434-2.414C2.045 10.731 0 8.35 0 5.5 0 2.836 2.086 1 4.25 1 5.797 1 7.153 1.802 8 3.02 8.847 1.802 10.203 1 11.75 1 13.914 1 16 2.836 16 5.5c0 2.85-2.045 5.231-3.885 6.818a22.066 22.066 0 0 1-3.744 2.584l-.018.01-.006.003h-.002ZM4.25 2.5c-1.336 0-2.75 1.164-2.75 3 0 2.15 1.58 4.144 3.365 5.682A20.58 20.58 0 0 0 8 13.393a20.58 20.58 0 0 0 3.135-2.211C12.92 9.644 14.5 7.65 14.5 5.5c0-1.836-1.414-3-2.75-3-1.373 0-2.609.986-3.029 2.456a.749.749 0 0 1-1.442 0C6.859 3.486 5.623 2.5 4.25 2.5Z"></path> </svg> </template> <template id="server-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-server"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v4c0 .372-.116.717-.314 1 .198.283.314.628.314 1v4a1.75 1.75 0 0 1-1.75 1.75H1.75A1.75 1.75 0 0 1 0 12.75v-4c0-.358.109-.707.314-1a1.739 1.739 0 0 1-.314-1v-4C0 1.784.784 1 1.75 1ZM1.5 2.75v4c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-4a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Zm.25 5.75a.25.25 0 0 0-.25.25v4c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-4a.25.25 0 0 0-.25-.25ZM7 4.75A.75.75 0 0 1 7.75 4h4.5a.75.75 0 0 1 0 1.5h-4.5A.75.75 0 0 1 7 4.75ZM7.75 10h4.5a.75.75 0 0 1 0 1.5h-4.5a.75.75 0 0 1 0-1.5ZM3 4.75A.75.75 0 0 1 3.75 4h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 3 4.75ZM3.75 10h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5Z"></path> </svg> </template> <template id="globe-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-globe"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM5.78 8.75a9.64 9.64 0 0 0 1.363 4.177c.255.426.542.832.857 1.215.245-.296.551-.705.857-1.215A9.64 9.64 0 0 0 10.22 8.75Zm4.44-1.5a9.64 9.64 0 0 0-1.363-4.177c-.307-.51-.612-.919-.857-1.215a9.927 9.927 0 0 0-.857 1.215A9.64 9.64 0 0 0 5.78 7.25Zm-5.944 1.5H1.543a6.507 6.507 0 0 0 4.666 5.5c-.123-.181-.24-.365-.352-.552-.715-1.192-1.437-2.874-1.581-4.948Zm-2.733-1.5h2.733c.144-2.074.866-3.756 1.58-4.948.12-.197.237-.381.353-.552a6.507 6.507 0 0 0-4.666 5.5Zm10.181 1.5c-.144 2.074-.866 3.756-1.58 4.948-.12.197-.237.381-.353.552a6.507 6.507 0 0 0 4.666-5.5Zm2.733-1.5a6.507 6.507 0 0 0-4.666-5.5c.123.181.24.365.353.552.714 1.192 1.436 2.874 1.58 4.948Z"></path> </svg> </template> <template id="issue-opened-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-issue-opened"> <path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path> </svg> </template> <template id="device-mobile-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-device-mobile"> <path d="M3.75 0h8.5C13.216 0 14 .784 14 1.75v12.5A1.75 1.75 0 0 1 12.25 16h-8.5A1.75 1.75 0 0 1 2 14.25V1.75C2 .784 2.784 0 3.75 0ZM3.5 1.75v12.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25ZM8 13a1 1 0 1 1 0-2 1 1 0 0 1 0 2Z"></path> </svg> </template> <template id="package-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-package"> <path d="m8.878.392 5.25 3.045c.54.314.872.89.872 1.514v6.098a1.75 1.75 0 0 1-.872 1.514l-5.25 3.045a1.75 1.75 0 0 1-1.756 0l-5.25-3.045A1.75 1.75 0 0 1 1 11.049V4.951c0-.624.332-1.201.872-1.514L7.122.392a1.75 1.75 0 0 1 1.756 0ZM7.875 1.69l-4.63 2.685L8 7.133l4.755-2.758-4.63-2.685a.248.248 0 0 0-.25 0ZM2.5 5.677v5.372c0 .09.047.171.125.216l4.625 2.683V8.432Zm6.25 8.271 4.625-2.683a.25.25 0 0 0 .125-.216V5.677L8.75 8.432Z"></path> </svg> </template> <template id="credit-card-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-credit-card"> <path d="M10.75 9a.75.75 0 0 0 0 1.5h1.5a.75.75 0 0 0 0-1.5h-1.5Z"></path><path d="M0 3.75C0 2.784.784 2 1.75 2h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0 1 14.25 14H1.75A1.75 1.75 0 0 1 0 12.25ZM14.5 6.5h-13v5.75c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25Zm0-2.75a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25V5h13Z"></path> </svg> </template> <template id="play-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-play"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm4.879-2.773 4.264 2.559a.25.25 0 0 1 0 .428l-4.264 2.559A.25.25 0 0 1 6 10.559V5.442a.25.25 0 0 1 .379-.215Z"></path> </svg> </template> <template id="gift-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-gift"> <path d="M2 2.75A2.75 2.75 0 0 1 4.75 0c.983 0 1.873.42 2.57 1.232.268.318.497.668.68 1.042.183-.375.411-.725.68-1.044C9.376.42 10.266 0 11.25 0a2.75 2.75 0 0 1 2.45 4h.55c.966 0 1.75.784 1.75 1.75v2c0 .698-.409 1.301-1 1.582v4.918A1.75 1.75 0 0 1 13.25 16H2.75A1.75 1.75 0 0 1 1 14.25V9.332C.409 9.05 0 8.448 0 7.75v-2C0 4.784.784 4 1.75 4h.55c-.192-.375-.3-.8-.3-1.25ZM7.25 9.5H2.5v4.75c0 .138.112.25.25.25h4.5Zm1.5 0v5h4.5a.25.25 0 0 0 .25-.25V9.5Zm0-4V8h5.5a.25.25 0 0 0 .25-.25v-2a.25.25 0 0 0-.25-.25Zm-7 0a.25.25 0 0 0-.25.25v2c0 .138.112.25.25.25h5.5V5.5h-5.5Zm3-4a1.25 1.25 0 0 0 0 2.5h2.309c-.233-.818-.542-1.401-.878-1.793-.43-.502-.915-.707-1.431-.707ZM8.941 4h2.309a1.25 1.25 0 0 0 0-2.5c-.516 0-1 .205-1.43.707-.337.392-.646.975-.879 1.793Z"></path> </svg> </template> <template id="code-square-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code-square"> <path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25Zm7.47 3.97a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L10.69 8 9.22 6.53a.75.75 0 0 1 0-1.06ZM6.78 6.53 5.31 8l1.47 1.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-2-2a.75.75 0 0 1 0-1.06l2-2a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> </template> <template id="device-desktop-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-device-desktop"> <path d="M14.25 1c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 14.25 12h-3.727c.099 1.041.52 1.872 1.292 2.757A.752.752 0 0 1 11.25 16h-6.5a.75.75 0 0 1-.565-1.243c.772-.885 1.192-1.716 1.292-2.757H1.75A1.75 1.75 0 0 1 0 10.25v-7.5C0 1.784.784 1 1.75 1ZM1.75 2.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25ZM9.018 12H6.982a5.72 5.72 0 0 1-.765 2.5h3.566a5.72 5.72 0 0 1-.765-2.5Z"></path> </svg> </template> <div class="position-relative"> <ul role="listbox" class="ActionListWrap QueryBuilder-ListWrap" aria-label="Suggestions" data-action=" combobox-commit:query-builder#comboboxCommit mousedown:query-builder#resultsMousedown " data-target="query-builder.resultsList" data-persist-list=false id="query-builder-test-results" ></ul> </div> <div class="FormControl-inlineValidation" id="validation-d7785cb5-7f88-432d-a1bf-d533dacddf8b" hidden="hidden"> <span class="FormControl-inlineValidation--visual"> <svg aria-hidden="true" height="12" viewBox="0 0 12 12" version="1.1" width="12" data-view-component="true" class="octicon octicon-alert-fill"> <path d="M4.855.708c.5-.896 1.79-.896 2.29 0l4.675 8.351a1.312 1.312 0 0 1-1.146 1.954H1.33A1.313 1.313 0 0 1 .183 9.058ZM7 7V3H5v4Zm-1 3a1 1 0 1 0 0-2 1 1 0 0 0 0 2Z"></path> </svg> </span> <span></span> </div> </div> <div data-target="query-builder.screenReaderFeedback" aria-live="polite" aria-atomic="true" class="sr-only"></div> </query-builder></form> <div class="d-flex flex-row color-fg-muted px-3 text-small color-bg-default search-feedback-prompt"> <a target="_blank" href="https://docs.github.com/search-github/github-code-search/understanding-github-code-search-syntax" data-view-component="true" class="Link color-fg-accent text-normal ml-2">Search syntax tips</a> <div class="d-flex flex-1"></div> </div> </div> </div> </div> </modal-dialog></div> </div> <div data-action="click:qbsearch-input#retract" class="dark-backdrop position-fixed" hidden data-target="qbsearch-input.darkBackdrop"></div> <div class="color-fg-default"> <dialog-helper> <dialog data-target="qbsearch-input.feedbackDialog" data-action="close:qbsearch-input#handleDialogClose cancel:qbsearch-input#handleDialogClose" id="feedback-dialog" aria-modal="true" aria-labelledby="feedback-dialog-title" aria-describedby="feedback-dialog-description" data-view-component="true" class="Overlay Overlay-whenNarrow Overlay--size-medium Overlay--motion-scaleFade Overlay--disableScroll"> <div data-view-component="true" class="Overlay-header"> <div class="Overlay-headerContentWrap"> <div class="Overlay-titleWrap"> <h1 class="Overlay-title " id="feedback-dialog-title"> Provide feedback </h1> </div> <div class="Overlay-actionWrap"> <button data-close-dialog-id="feedback-dialog" aria-label="Close" type="button" data-view-component="true" class="close-button Overlay-closeButton"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg></button> </div> </div> </div> <scrollable-region data-labelled-by="feedback-dialog-title"> <div data-view-component="true" class="Overlay-body"> <!-- '"` --><!-- </textarea></xmp> --></option></form><form id="code-search-feedback-form" data-turbo="false" action="/search/feedback" accept-charset="UTF-8" method="post"><input type="hidden" data-csrf="true" name="authenticity_token" value="7BLcBUXk/WhYQsy08smNDLqjNLTnV8B0K79FUqeByPtsLAbcRQ/CsOyDfdhZNJ1a1GnrioR1Bf5KEhZQc74O8Q==" /> <p>We read every piece of feedback, and take your input very seriously.</p> <textarea name="feedback" class="form-control width-full mb-2" style="height: 120px" id="feedback"></textarea> <input name="include_email" id="include_email" aria-label="Include my email address so I can be contacted" class="form-control mr-2" type="checkbox"> <label for="include_email" style="font-weight: normal">Include my email address so I can be contacted</label> </form></div> </scrollable-region> <div data-view-component="true" class="Overlay-footer Overlay-footer--alignEnd"> <button data-close-dialog-id="feedback-dialog" type="button" data-view-component="true" class="btn"> Cancel </button> <button form="code-search-feedback-form" data-action="click:qbsearch-input#submitFeedback" type="submit" data-view-component="true" class="btn-primary btn"> Submit feedback </button> </div> </dialog></dialog-helper> <custom-scopes data-target="qbsearch-input.customScopesManager"> <dialog-helper> <dialog data-target="custom-scopes.customScopesModalDialog" data-action="close:qbsearch-input#handleDialogClose cancel:qbsearch-input#handleDialogClose" id="custom-scopes-dialog" aria-modal="true" aria-labelledby="custom-scopes-dialog-title" aria-describedby="custom-scopes-dialog-description" data-view-component="true" class="Overlay Overlay-whenNarrow Overlay--size-medium Overlay--motion-scaleFade Overlay--disableScroll"> <div data-view-component="true" class="Overlay-header Overlay-header--divided"> <div class="Overlay-headerContentWrap"> <div class="Overlay-titleWrap"> <h1 class="Overlay-title " id="custom-scopes-dialog-title"> Saved searches </h1> <h2 id="custom-scopes-dialog-description" class="Overlay-description">Use saved searches to filter your results more quickly</h2> </div> <div class="Overlay-actionWrap"> <button data-close-dialog-id="custom-scopes-dialog" aria-label="Close" type="button" data-view-component="true" class="close-button Overlay-closeButton"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg></button> </div> </div> </div> <scrollable-region data-labelled-by="custom-scopes-dialog-title"> <div data-view-component="true" class="Overlay-body"> <div data-target="custom-scopes.customScopesModalDialogFlash"></div> <div hidden class="create-custom-scope-form" data-target="custom-scopes.createCustomScopeForm"> <!-- '"` --><!-- </textarea></xmp> --></option></form><form id="custom-scopes-dialog-form" data-turbo="false" action="/search/custom_scopes" accept-charset="UTF-8" method="post"><input type="hidden" data-csrf="true" name="authenticity_token" value="MMOj+Aq4ff1rThxQIA6krBJfjkHd9c7m7ZXtli3uvU+kzOi2XqyuuOZn3YPDlqCL1mxGfPqG5pMP6tQrevG4Zw==" /> <div data-target="custom-scopes.customScopesModalDialogFlash"></div> <input type="hidden" id="custom_scope_id" name="custom_scope_id" data-target="custom-scopes.customScopesIdField"> <div class="form-group"> <label for="custom_scope_name">Name</label> <auto-check src="/search/custom_scopes/check_name" required only-validate-on-blur="false"> <input type="text" name="custom_scope_name" id="custom_scope_name" data-target="custom-scopes.customScopesNameField" class="form-control" autocomplete="off" placeholder="github-ruby" required maxlength="50"> <input type="hidden" data-csrf="true" value="5GFGkTa3tUjmLUTdTkLhLe91XFqf4vdbJzSs/qjRlgVsl8/GChvBjy77uz1OItGgQ4ADTM+H1We0xNbkkCtSPQ==" /> </auto-check> </div> <div class="form-group"> <label for="custom_scope_query">Query</label> <input type="text" name="custom_scope_query" id="custom_scope_query" data-target="custom-scopes.customScopesQueryField" class="form-control" autocomplete="off" placeholder="(repo:mona/a OR repo:mona/b) AND lang:python" required maxlength="500"> </div> <p class="text-small color-fg-muted"> To see all available qualifiers, see our <a class="Link--inTextBlock" href="https://docs.github.com/search-github/github-code-search/understanding-github-code-search-syntax">documentation</a>. </p> </form> </div> <div data-target="custom-scopes.manageCustomScopesForm"> <div data-target="custom-scopes.list"></div> </div> </div> </scrollable-region> <div data-view-component="true" class="Overlay-footer Overlay-footer--alignEnd Overlay-footer--divided"> <button data-action="click:custom-scopes#customScopesCancel" type="button" data-view-component="true" class="btn"> Cancel </button> <button form="custom-scopes-dialog-form" data-action="click:custom-scopes#customScopesSubmit" data-target="custom-scopes.customScopesSubmitButton" type="submit" data-view-component="true" class="btn-primary btn"> Create saved search </button> </div> </dialog></dialog-helper> </custom-scopes> </div> </qbsearch-input> <div class="position-relative HeaderMenu-link-wrap d-lg-inline-block"> <a href="/login?return_to=https%3A%2F%2Fgithub.com%2FNVIDIA%2Fcutlass" class="HeaderMenu-link HeaderMenu-link--sign-in HeaderMenu-button flex-shrink-0 no-underline d-none d-lg-inline-flex border border-lg-0 rounded rounded-lg-0 px-2 py-1" style="margin-left: 12px;" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;site header menu&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;SIGN_UP&quot;,&quot;originating_url&quot;:&quot;https://github.com/NVIDIA/cutlass&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="d813b087105eb5c9241e62c1dc7cbf47960232874207f670836baca8e119fb52" data-analytics-event="{&quot;category&quot;:&quot;Marketing nav&quot;,&quot;action&quot;:&quot;click to go to homepage&quot;,&quot;label&quot;:&quot;ref_page:Marketing;ref_cta:Sign in;ref_loc:Header&quot;}" > Sign in </a> </div> <a href="/signup?ref_cta=Sign+up&amp;ref_loc=header+logged+out&amp;ref_page=%2F%3Cuser-name%3E%2F%3Crepo-name%3E&amp;source=header-repo&amp;source_repo=NVIDIA%2Fcutlass" class="HeaderMenu-link HeaderMenu-link--sign-up HeaderMenu-button flex-shrink-0 d-flex d-lg-inline-flex no-underline border color-border-default rounded px-2 py-1" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;site header menu&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;SIGN_UP&quot;,&quot;originating_url&quot;:&quot;https://github.com/NVIDIA/cutlass&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="d813b087105eb5c9241e62c1dc7cbf47960232874207f670836baca8e119fb52" data-analytics-event="{&quot;category&quot;:&quot;Sign up&quot;,&quot;action&quot;:&quot;click to sign up for account&quot;,&quot;label&quot;:&quot;ref_page:/&lt;user-name&gt;/&lt;repo-name&gt;;ref_cta:Sign up;ref_loc:header logged out&quot;}" > Sign up </a> <button type="button" class="sr-only js-header-menu-focus-trap d-block d-lg-none">Reseting focus</button> </div> </div> </div> </div> </header> <div hidden="hidden" data-view-component="true" class="js-stale-session-flash stale-session-flash flash flash-warn flash-full"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-alert"> <path d="M6.457 1.047c.659-1.234 2.427-1.234 3.086 0l6.082 11.378A1.75 1.75 0 0 1 14.082 15H1.918a1.75 1.75 0 0 1-1.543-2.575Zm1.763.707a.25.25 0 0 0-.44 0L1.698 13.132a.25.25 0 0 0 .22.368h12.164a.25.25 0 0 0 .22-.368Zm.53 3.996v2.5a.75.75 0 0 1-1.5 0v-2.5a.75.75 0 0 1 1.5 0ZM9 11a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> <span class="js-stale-session-flash-signed-in" hidden>You signed in with another tab or window. <a class="Link--inTextBlock" href="">Reload</a> to refresh your session.</span> <span class="js-stale-session-flash-signed-out" hidden>You signed out in another tab or window. <a class="Link--inTextBlock" href="">Reload</a> to refresh your session.</span> <span class="js-stale-session-flash-switched" hidden>You switched accounts on another tab or window. <a class="Link--inTextBlock" href="">Reload</a> to refresh your session.</span> <button id="icon-button-3b706f22-1dd6-4019-9705-f89fb248790a" aria-labelledby="tooltip-81af65f2-76c6-4f8b-9a3f-eb5e0da215ea" type="button" data-view-component="true" class="Button Button--iconOnly Button--invisible Button--medium flash-close js-flash-close"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x Button-visual"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button><tool-tip id="tooltip-81af65f2-76c6-4f8b-9a3f-eb5e0da215ea" for="icon-button-3b706f22-1dd6-4019-9705-f89fb248790a" popover="manual" data-direction="s" data-type="label" data-view-component="true" class="sr-only position-absolute">Dismiss alert</tool-tip> </div> </div> <div id="start-of-content" class="show-on-focus"></div> <div id="js-flash-container" class="flash-container" data-turbo-replace> <template class="js-flash-template"> <div class="flash flash-full {{ className }}"> <div > <button autofocus class="flash-close js-flash-close" type="button" aria-label="Dismiss this message"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> <div aria-atomic="true" role="alert" class="js-flash-alert"> <div>{{ message }}</div> </div> </div> </div> </template> </div> <div class="application-main " data-commit-hovercards-enabled data-discussion-hovercards-enabled data-issue-and-pr-hovercards-enabled data-project-hovercards-enabled > <div itemscope itemtype="http://schema.org/SoftwareSourceCode" class=""> <main id="js-repo-pjax-container" > <div id="repository-container-header" class="pt-3 hide-full-screen" style="background-color: var(--page-header-bgColor, var(--color-page-header-bg));" data-turbo-replace> <div class="d-flex flex-nowrap flex-justify-end mb-3 px-3 px-lg-5" style="gap: 1rem;"> <div class="flex-auto min-width-0 width-fit"> <div class=" d-flex flex-wrap flex-items-center wb-break-word f3 text-normal"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo color-fg-muted mr-2"> <path d="M2 2.5A2.5 2.5 0 0 1 4.5 0h8.75a.75.75 0 0 1 .75.75v12.5a.75.75 0 0 1-.75.75h-2.5a.75.75 0 0 1 0-1.5h1.75v-2h-8a1 1 0 0 0-.714 1.7.75.75 0 1 1-1.072 1.05A2.495 2.495 0 0 1 2 11.5Zm10.5-1h-8a1 1 0 0 0-1 1v6.708A2.486 2.486 0 0 1 4.5 9h8ZM5 12.25a.25.25 0 0 1 .25-.25h3.5a.25.25 0 0 1 .25.25v3.25a.25.25 0 0 1-.4.2l-1.45-1.087a.249.249 0 0 0-.3 0L5.4 15.7a.25.25 0 0 1-.4-.2Z"></path> </svg> <span class="author flex-self-stretch" itemprop="author"> <a class="url fn" rel="author" data-hovercard-type="organization" data-hovercard-url="/orgs/NVIDIA/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" href="/NVIDIA"> NVIDIA </a> </span> <span class="mx-1 flex-self-stretch color-fg-muted">/</span> <strong itemprop="name" class="mr-2 flex-self-stretch"> <a data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" href="/NVIDIA/cutlass">cutlass</a> </strong> <span></span><span class="Label Label--secondary v-align-middle mr-1">Public</span> </div> </div> <div id="repository-details-container" class="flex-shrink-0" data-turbo-replace style="max-width: 70%;"> <ul class="pagehead-actions flex-shrink-0 d-none d-md-inline" style="padding: 2px 0;"> <li> <a href="/login?return_to=%2FNVIDIA%2Fcutlass" rel="nofollow" id="repository-details-watch-button" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;notification subscription menu watch&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/NVIDIA/cutlass&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="85f0d564b4cea7e2900eda4d7b3f08d82f1381ae1125e9cdc5e876b1bbcfac86" aria-label="You must be signed in to change notification settings" data-view-component="true" class="btn-sm btn"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-bell mr-2"> <path d="M8 16a2 2 0 0 0 1.985-1.75c.017-.137-.097-.25-.235-.25h-3.5c-.138 0-.252.113-.235.25A2 2 0 0 0 8 16ZM3 5a5 5 0 0 1 10 0v2.947c0 .05.015.098.042.139l1.703 2.555A1.519 1.519 0 0 1 13.482 13H2.518a1.516 1.516 0 0 1-1.263-2.36l1.703-2.554A.255.255 0 0 0 3 7.947Zm5-3.5A3.5 3.5 0 0 0 4.5 5v2.947c0 .346-.102.683-.294.97l-1.703 2.556a.017.017 0 0 0-.003.01l.001.006c0 .002.002.004.004.006l.006.004.007.001h10.964l.007-.001.006-.004.004-.006.001-.007a.017.017 0 0 0-.003-.01l-1.703-2.554a1.745 1.745 0 0 1-.294-.97V5A3.5 3.5 0 0 0 8 1.5Z"></path> </svg>Notifications </a> <tool-tip id="tooltip-248e66f3-9f4d-42b3-9aeb-302b919fa1bb" for="repository-details-watch-button" popover="manual" data-direction="s" data-type="description" data-view-component="true" class="sr-only position-absolute">You must be signed in to change notification settings</tool-tip> </li> <li> <a icon="repo-forked" id="fork-button" href="/login?return_to=%2FNVIDIA%2Fcutlass" rel="nofollow" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;repo details fork button&quot;,&quot;repository_id&quot;:112542515,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/NVIDIA/cutlass&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="14f774d1d4978b69dede786f7dc5d446dad68c588197586ecd7ccaa852e793d7" data-view-component="true" class="btn-sm btn"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo-forked mr-2"> <path d="M5 5.372v.878c0 .414.336.75.75.75h4.5a.75.75 0 0 0 .75-.75v-.878a2.25 2.25 0 1 1 1.5 0v.878a2.25 2.25 0 0 1-2.25 2.25h-1.5v2.128a2.251 2.251 0 1 1-1.5 0V8.5h-1.5A2.25 2.25 0 0 1 3.5 6.25v-.878a2.25 2.25 0 1 1 1.5 0ZM5 3.25a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Zm6.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5Zm-3 8.75a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Z"></path> </svg>Fork <span id="repo-network-counter" data-pjax-replace="true" data-turbo-replace="true" title="1,070" data-view-component="true" class="Counter">1.1k</span> </a> </li> <li> <div data-view-component="true" class="BtnGroup d-flex"> <a href="/login?return_to=%2FNVIDIA%2Fcutlass" rel="nofollow" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;star button&quot;,&quot;repository_id&quot;:112542515,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/NVIDIA/cutlass&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="87aa6f680e447962571bf4f4fa9c3468c5065feac16c80c6fcf690be245aa068" aria-label="You must be signed in to star a repository" data-view-component="true" class="tooltipped tooltipped-sw btn-sm btn"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star v-align-text-bottom d-inline-block mr-2"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg><span data-view-component="true" class="d-inline"> Star </span> <span id="repo-stars-counter-star" aria-label="6229 users starred this repository" data-singular-suffix="user starred this repository" data-plural-suffix="users starred this repository" data-turbo-replace="true" title="6,229" data-view-component="true" class="Counter js-social-count">6.2k</span> </a></div> </li> </ul> </div> </div> <div id="responsive-meta-container" data-turbo-replace> <div class="d-block d-md-none mb-2 px-3 px-md-4 px-lg-5"> <p class="f4 mb-3 "> CUDA Templates for Linear Algebra Subroutines </p> <h3 class="sr-only">License</h3> <div class="mb-2"> <a href="/NVIDIA/cutlass/blob/main/LICENSE.txt" class="Link--muted" data-analytics-event="{&quot;category&quot;:&quot;Repository Overview&quot;,&quot;action&quot;:&quot;click&quot;,&quot;label&quot;:&quot;location:sidebar;file:license&quot;}" > <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-law mr-2"> <path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"></path> </svg> View license </a> </div> <div class="mb-3"> <a class="Link--secondary no-underline mr-3" href="/NVIDIA/cutlass/stargazers"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star mr-1"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg> <span class="text-bold">6.2k</span> stars </a> <a class="Link--secondary no-underline mr-3" href="/NVIDIA/cutlass/forks"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo-forked mr-1"> <path d="M5 5.372v.878c0 .414.336.75.75.75h4.5a.75.75 0 0 0 .75-.75v-.878a2.25 2.25 0 1 1 1.5 0v.878a2.25 2.25 0 0 1-2.25 2.25h-1.5v2.128a2.251 2.251 0 1 1-1.5 0V8.5h-1.5A2.25 2.25 0 0 1 3.5 6.25v-.878a2.25 2.25 0 1 1 1.5 0ZM5 3.25a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Zm6.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5Zm-3 8.75a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Z"></path> </svg> <span class="text-bold">1.1k</span> forks </a> <a class="Link--secondary no-underline mr-3 d-inline-block" href="/NVIDIA/cutlass/branches"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-git-branch mr-1"> <path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path> </svg> <span>Branches</span> </a> <a class="Link--secondary no-underline d-inline-block" href="/NVIDIA/cutlass/tags"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-tag mr-1"> <path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path> </svg> <span>Tags</span> </a> <a class="Link--secondary no-underline d-inline-block" href="/NVIDIA/cutlass/activity"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-pulse mr-1"> <path d="M6 2c.306 0 .582.187.696.471L10 10.731l1.304-3.26A.751.751 0 0 1 12 7h3.25a.75.75 0 0 1 0 1.5h-2.742l-1.812 4.528a.751.751 0 0 1-1.392 0L6 4.77 4.696 8.03A.75.75 0 0 1 4 8.5H.75a.75.75 0 0 1 0-1.5h2.742l1.812-4.529A.751.751 0 0 1 6 2Z"></path> </svg> <span>Activity</span> </a> </div> <div class="d-flex flex-wrap gap-2"> <div class="flex-1"> <div data-view-component="true" class="BtnGroup d-flex"> <a href="/login?return_to=%2FNVIDIA%2Fcutlass" rel="nofollow" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;star button&quot;,&quot;repository_id&quot;:112542515,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/NVIDIA/cutlass&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="87aa6f680e447962571bf4f4fa9c3468c5065feac16c80c6fcf690be245aa068" aria-label="You must be signed in to star a repository" data-view-component="true" class="tooltipped tooltipped-sw btn-sm btn btn-block"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star v-align-text-bottom d-inline-block mr-2"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg><span data-view-component="true" class="d-inline"> Star </span> </a></div> </div> <div class="flex-1"> <a href="/login?return_to=%2FNVIDIA%2Fcutlass" rel="nofollow" id="files-overview-watch-button" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;notification subscription menu watch&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/NVIDIA/cutlass&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="85f0d564b4cea7e2900eda4d7b3f08d82f1381ae1125e9cdc5e876b1bbcfac86" aria-label="You must be signed in to change notification settings" data-view-component="true" class="btn-sm btn btn-block"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-bell mr-2"> <path d="M8 16a2 2 0 0 0 1.985-1.75c.017-.137-.097-.25-.235-.25h-3.5c-.138 0-.252.113-.235.25A2 2 0 0 0 8 16ZM3 5a5 5 0 0 1 10 0v2.947c0 .05.015.098.042.139l1.703 2.555A1.519 1.519 0 0 1 13.482 13H2.518a1.516 1.516 0 0 1-1.263-2.36l1.703-2.554A.255.255 0 0 0 3 7.947Zm5-3.5A3.5 3.5 0 0 0 4.5 5v2.947c0 .346-.102.683-.294.97l-1.703 2.556a.017.017 0 0 0-.003.01l.001.006c0 .002.002.004.004.006l.006.004.007.001h10.964l.007-.001.006-.004.004-.006.001-.007a.017.017 0 0 0-.003-.01l-1.703-2.554a1.745 1.745 0 0 1-.294-.97V5A3.5 3.5 0 0 0 8 1.5Z"></path> </svg>Notifications </a> <tool-tip id="tooltip-20a472b4-685c-4fad-8e22-9c623c607b7f" for="files-overview-watch-button" popover="manual" data-direction="s" data-type="description" data-view-component="true" class="sr-only position-absolute">You must be signed in to change notification settings</tool-tip> </div> <span> </span> </div> </div> </div> <nav data-pjax="#js-repo-pjax-container" aria-label="Repository" data-view-component="true" class="js-repo-nav js-sidenav-container-pjax js-responsive-underlinenav overflow-hidden UnderlineNav px-3 px-md-4 px-lg-5"> <ul data-view-component="true" class="UnderlineNav-body list-style-none"> <li data-view-component="true" class="d-inline-flex"> <a id="code-tab" href="/NVIDIA/cutlass" data-tab-item="i0code-tab" data-selected-links="repo_source repo_downloads repo_commits repo_releases repo_tags repo_branches repo_packages repo_deployments repo_attestations /NVIDIA/cutlass" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g c" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Code&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" aria-current="page" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item selected"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code UnderlineNav-octicon d-none d-sm-inline"> <path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path> </svg> <span data-content="Code">Code</span> <span id="code-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="issues-tab" href="/NVIDIA/cutlass/issues" data-tab-item="i1issues-tab" data-selected-links="repo_issues repo_labels repo_milestones /NVIDIA/cutlass/issues" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g i" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Issues&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-issue-opened UnderlineNav-octicon d-none d-sm-inline"> <path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path> </svg> <span data-content="Issues">Issues</span> <span id="issues-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="227" data-view-component="true" class="Counter">227</span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="pull-requests-tab" href="/NVIDIA/cutlass/pulls" data-tab-item="i2pull-requests-tab" data-selected-links="repo_pulls checks /NVIDIA/cutlass/pulls" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g p" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Pull requests&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-git-pull-request UnderlineNav-octicon d-none d-sm-inline"> <path d="M1.5 3.25a2.25 2.25 0 1 1 3 2.122v5.256a2.251 2.251 0 1 1-1.5 0V5.372A2.25 2.25 0 0 1 1.5 3.25Zm5.677-.177L9.573.677A.25.25 0 0 1 10 .854V2.5h1A2.5 2.5 0 0 1 13.5 5v5.628a2.251 2.251 0 1 1-1.5 0V5a1 1 0 0 0-1-1h-1v1.646a.25.25 0 0 1-.427.177L7.177 3.427a.25.25 0 0 1 0-.354ZM3.75 2.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm0 9.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm8.25.75a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Z"></path> </svg> <span data-content="Pull requests">Pull requests</span> <span id="pull-requests-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="31" data-view-component="true" class="Counter">31</span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="discussions-tab" href="/NVIDIA/cutlass/discussions" data-tab-item="i3discussions-tab" data-selected-links="repo_discussions /NVIDIA/cutlass/discussions" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g g" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Discussions&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment-discussion UnderlineNav-octicon d-none d-sm-inline"> <path d="M1.75 1h8.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 10.25 10H7.061l-2.574 2.573A1.458 1.458 0 0 1 2 11.543V10h-.25A1.75 1.75 0 0 1 0 8.25v-5.5C0 1.784.784 1 1.75 1ZM1.5 2.75v5.5c0 .138.112.25.25.25h1a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h3.5a.25.25 0 0 0 .25-.25v-5.5a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25Zm13 2a.25.25 0 0 0-.25-.25h-.5a.75.75 0 0 1 0-1.5h.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 14.25 12H14v1.543a1.458 1.458 0 0 1-2.487 1.03L9.22 12.28a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l2.22 2.22v-2.19a.75.75 0 0 1 .75-.75h1a.25.25 0 0 0 .25-.25Z"></path> </svg> <span data-content="Discussions">Discussions</span> <span id="discussions-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="actions-tab" href="/NVIDIA/cutlass/actions" data-tab-item="i4actions-tab" data-selected-links="repo_actions /NVIDIA/cutlass/actions" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g a" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Actions&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-play UnderlineNav-octicon d-none d-sm-inline"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm4.879-2.773 4.264 2.559a.25.25 0 0 1 0 .428l-4.264 2.559A.25.25 0 0 1 6 10.559V5.442a.25.25 0 0 1 .379-.215Z"></path> </svg> <span data-content="Actions">Actions</span> <span id="actions-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="projects-tab" href="/NVIDIA/cutlass/projects" data-tab-item="i5projects-tab" data-selected-links="repo_projects new_repo_project repo_project /NVIDIA/cutlass/projects" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g b" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Projects&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-table UnderlineNav-octicon d-none d-sm-inline"> <path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25ZM6.5 6.5v8h7.75a.25.25 0 0 0 .25-.25V6.5Zm8-1.5V1.75a.25.25 0 0 0-.25-.25H6.5V5Zm-13 1.5v7.75c0 .138.112.25.25.25H5v-8ZM5 5V1.5H1.75a.25.25 0 0 0-.25.25V5Z"></path> </svg> <span data-content="Projects">Projects</span> <span id="projects-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="0" hidden="hidden" data-view-component="true" class="Counter">0</span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="wiki-tab" href="/NVIDIA/cutlass/wiki" data-tab-item="i6wiki-tab" data-selected-links="repo_wiki /NVIDIA/cutlass/wiki" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g w" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Wiki&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book UnderlineNav-octicon d-none d-sm-inline"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> <span data-content="Wiki">Wiki</span> <span id="wiki-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="security-tab" href="/NVIDIA/cutlass/security" data-tab-item="i7security-tab" data-selected-links="security overview alerts policy token_scanning code_scanning /NVIDIA/cutlass/security" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g s" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Security&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-shield UnderlineNav-octicon d-none d-sm-inline"> <path d="M7.467.133a1.748 1.748 0 0 1 1.066 0l5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667Zm.61 1.429a.25.25 0 0 0-.153 0l-5.25 1.68a.25.25 0 0 0-.174.238V7c0 1.358.275 2.666 1.057 3.86.784 1.194 2.121 2.34 4.366 3.297a.196.196 0 0 0 .154 0c2.245-.956 3.582-2.104 4.366-3.298C13.225 9.666 13.5 8.36 13.5 7V3.48a.251.251 0 0 0-.174-.237l-5.25-1.68ZM8.75 4.75v3a.75.75 0 0 1-1.5 0v-3a.75.75 0 0 1 1.5 0ZM9 10.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> <span data-content="Security">Security</span> <include-fragment src="/NVIDIA/cutlass/security/overall-count" accept="text/fragment+html"></include-fragment> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="insights-tab" href="/NVIDIA/cutlass/pulse" data-tab-item="i8insights-tab" data-selected-links="repo_graphs repo_contributors dependency_graph dependabot_updates pulse people community /NVIDIA/cutlass/pulse" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Insights&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-graph UnderlineNav-octicon d-none d-sm-inline"> <path d="M1.5 1.75V13.5h13.75a.75.75 0 0 1 0 1.5H.75a.75.75 0 0 1-.75-.75V1.75a.75.75 0 0 1 1.5 0Zm14.28 2.53-5.25 5.25a.75.75 0 0 1-1.06 0L7 7.06 4.28 9.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.25-3.25a.75.75 0 0 1 1.06 0L10 7.94l4.72-4.72a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> <span data-content="Insights">Insights</span> <span id="insights-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> </ul> <div style="visibility:hidden;" data-view-component="true" class="UnderlineNav-actions js-responsive-underlinenav-overflow position-absolute pr-3 pr-md-4 pr-lg-5 right-0"> <action-menu data-select-variant="none" data-view-component="true"> <focus-group direction="vertical" mnemonics retain> <button id="action-menu-acabb6de-ba4d-47ea-8ee2-f864a8c3ed47-button" popovertarget="action-menu-acabb6de-ba4d-47ea-8ee2-f864a8c3ed47-overlay" aria-controls="action-menu-acabb6de-ba4d-47ea-8ee2-f864a8c3ed47-list" aria-haspopup="true" aria-labelledby="tooltip-2c22fc87-2efb-481b-892f-4a20a14117d3" type="button" data-view-component="true" class="Button Button--iconOnly Button--secondary Button--medium UnderlineNav-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-kebab-horizontal Button-visual"> <path d="M8 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3ZM1.5 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Zm13 0a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path> </svg> </button><tool-tip id="tooltip-2c22fc87-2efb-481b-892f-4a20a14117d3" for="action-menu-acabb6de-ba4d-47ea-8ee2-f864a8c3ed47-button" popover="manual" data-direction="s" data-type="label" data-view-component="true" class="sr-only position-absolute">Additional navigation options</tool-tip> <anchored-position data-target="action-menu.overlay" id="action-menu-acabb6de-ba4d-47ea-8ee2-f864a8c3ed47-overlay" anchor="action-menu-acabb6de-ba4d-47ea-8ee2-f864a8c3ed47-button" align="start" side="outside-bottom" anchor-offset="normal" popover="auto" data-view-component="true"> <div data-view-component="true" class="Overlay Overlay--size-auto"> <div data-view-component="true" class="Overlay-body Overlay-body--paddingNone"> <action-list> <div data-view-component="true"> <ul aria-labelledby="action-menu-acabb6de-ba4d-47ea-8ee2-f864a8c3ed47-button" id="action-menu-acabb6de-ba4d-47ea-8ee2-f864a8c3ed47-list" role="menu" data-view-component="true" class="ActionListWrap--inset ActionListWrap"> <li hidden="hidden" data-menu-item="i0code-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-74ce1e1b-bc7e-4865-975f-5bd52feed739" href="/NVIDIA/cutlass" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code"> <path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Code </span> </a> </li> <li hidden="hidden" data-menu-item="i1issues-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-23759fb8-1c67-4fad-a1bd-9debf9b6c4cf" href="/NVIDIA/cutlass/issues" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-issue-opened"> <path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Issues </span> </a> </li> <li hidden="hidden" data-menu-item="i2pull-requests-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-90aa76c3-72b9-41a4-8cec-00e312cb16b1" href="/NVIDIA/cutlass/pulls" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-git-pull-request"> <path d="M1.5 3.25a2.25 2.25 0 1 1 3 2.122v5.256a2.251 2.251 0 1 1-1.5 0V5.372A2.25 2.25 0 0 1 1.5 3.25Zm5.677-.177L9.573.677A.25.25 0 0 1 10 .854V2.5h1A2.5 2.5 0 0 1 13.5 5v5.628a2.251 2.251 0 1 1-1.5 0V5a1 1 0 0 0-1-1h-1v1.646a.25.25 0 0 1-.427.177L7.177 3.427a.25.25 0 0 1 0-.354ZM3.75 2.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm0 9.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm8.25.75a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Pull requests </span> </a> </li> <li hidden="hidden" data-menu-item="i3discussions-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-7d129371-3da7-4696-9ccc-2f7ff302f823" href="/NVIDIA/cutlass/discussions" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment-discussion"> <path d="M1.75 1h8.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 10.25 10H7.061l-2.574 2.573A1.458 1.458 0 0 1 2 11.543V10h-.25A1.75 1.75 0 0 1 0 8.25v-5.5C0 1.784.784 1 1.75 1ZM1.5 2.75v5.5c0 .138.112.25.25.25h1a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h3.5a.25.25 0 0 0 .25-.25v-5.5a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25Zm13 2a.25.25 0 0 0-.25-.25h-.5a.75.75 0 0 1 0-1.5h.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 14.25 12H14v1.543a1.458 1.458 0 0 1-2.487 1.03L9.22 12.28a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l2.22 2.22v-2.19a.75.75 0 0 1 .75-.75h1a.25.25 0 0 0 .25-.25Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Discussions </span> </a> </li> <li hidden="hidden" data-menu-item="i4actions-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-bc34fa0c-6932-42cc-83e7-f4a2c8ff4783" href="/NVIDIA/cutlass/actions" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-play"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm4.879-2.773 4.264 2.559a.25.25 0 0 1 0 .428l-4.264 2.559A.25.25 0 0 1 6 10.559V5.442a.25.25 0 0 1 .379-.215Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Actions </span> </a> </li> <li hidden="hidden" data-menu-item="i5projects-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-167b3938-b41e-453e-a139-5766a19e24fb" href="/NVIDIA/cutlass/projects" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-table"> <path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25ZM6.5 6.5v8h7.75a.25.25 0 0 0 .25-.25V6.5Zm8-1.5V1.75a.25.25 0 0 0-.25-.25H6.5V5Zm-13 1.5v7.75c0 .138.112.25.25.25H5v-8ZM5 5V1.5H1.75a.25.25 0 0 0-.25.25V5Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Projects </span> </a> </li> <li hidden="hidden" data-menu-item="i6wiki-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-81909b64-8508-4691-9cef-9e6183e0925b" href="/NVIDIA/cutlass/wiki" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Wiki </span> </a> </li> <li hidden="hidden" data-menu-item="i7security-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-750ae0da-994c-47fa-bbbf-d4861dcd6cf3" href="/NVIDIA/cutlass/security" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-shield"> <path d="M7.467.133a1.748 1.748 0 0 1 1.066 0l5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667Zm.61 1.429a.25.25 0 0 0-.153 0l-5.25 1.68a.25.25 0 0 0-.174.238V7c0 1.358.275 2.666 1.057 3.86.784 1.194 2.121 2.34 4.366 3.297a.196.196 0 0 0 .154 0c2.245-.956 3.582-2.104 4.366-3.298C13.225 9.666 13.5 8.36 13.5 7V3.48a.251.251 0 0 0-.174-.237l-5.25-1.68ZM8.75 4.75v3a.75.75 0 0 1-1.5 0v-3a.75.75 0 0 1 1.5 0ZM9 10.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Security </span> </a> </li> <li hidden="hidden" data-menu-item="i8insights-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-1bada728-b234-4b70-8e62-06b604db0846" href="/NVIDIA/cutlass/pulse" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-graph"> <path d="M1.5 1.75V13.5h13.75a.75.75 0 0 1 0 1.5H.75a.75.75 0 0 1-.75-.75V1.75a.75.75 0 0 1 1.5 0Zm14.28 2.53-5.25 5.25a.75.75 0 0 1-1.06 0L7 7.06 4.28 9.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.25-3.25a.75.75 0 0 1 1.06 0L10 7.94l4.72-4.72a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Insights </span> </a> </li> </ul> </div></action-list> </div> </div></anchored-position> </focus-group> </action-menu></div> </nav> </div> <turbo-frame id="repo-content-turbo-frame" target="_top" data-turbo-action="advance" class=""> <div id="repo-content-pjax-container" class="repository-content " > <h1 class='sr-only'>NVIDIA/cutlass</h1> <div class="clearfix container-xl px-md-4 px-lg-5 px-3"> <div> <div style="max-width: 100%" data-view-component="true" class="Layout Layout--flowRow-until-md react-repos-overview-margin Layout--sidebarPosition-end Layout--sidebarPosition-flowRow-end"> <div data-view-component="true" class="Layout-main"> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_dompurify_dist_purify_js-b89b98661809.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_tanstack_query-core_build_modern_queryObserver_js-node_modules_tanstack_-defd52-843b41414e0e.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_hydro-analytics-client_dist_analytics-client_js-node_modules_gith-853b24-f2006d2a5b98.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_aria-live_aria-live_ts-ui_packages_promise-with-resolvers-polyfill_promise-with-r-17c672-d6b5ea82572a.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_paths_index_ts-9c4436ef49de.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_ref-selector_RefSelector_tsx-2cce17df147b.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_code-view-shared_hooks_use-canonical-object_ts-ui_packages_code-view-shared_hooks-a6859a-09c7f754ea79.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/repos-overview-d88ea0b31285.js"></script> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.5a0ffaf77c0db0d0dac2.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/repos-overview.32a87dc4587d56dcf1eb.module.css" /> <react-partial partial-name="repos-overview" data-ssr="true" data-attempted-ssr="true" > <script type="application/json" data-target="react-partial.embeddedData">{"props":{"initialPayload":{"allShortcutsEnabled":false,"path":"/","repo":{"id":112542515,"defaultBranch":"main","name":"cutlass","ownerLogin":"NVIDIA","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2017-11-30T00:11:24.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/1728152?v=4","public":true,"private":false,"isOrgOwned":true},"currentUser":null,"refInfo":{"name":"main","listCacheKey":"v0:1738104545.0","canEdit":false,"refType":"branch","currentOid":"e9627ce55b42fd2599f58cd4396da9380954def0"},"tree":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"examples","path":"examples","contentType":"directory"},{"name":"include","path":"include","contentType":"directory"},{"name":"media","path":"media","contentType":"directory"},{"name":"python","path":"python","contentType":"directory"},{"name":"test","path":"test","contentType":"directory"},{"name":"tools","path":"tools","contentType":"directory"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":"ACTIVE_DEVELOPERS.md","path":"ACTIVE_DEVELOPERS.md","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CITATION.cff","path":"CITATION.cff","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CUDA.cmake","path":"CUDA.cmake","contentType":"file"},{"name":"Doxyfile","path":"Doxyfile","contentType":"file"},{"name":"LICENSE.txt","path":"LICENSE.txt","contentType":"file"},{"name":"PUBLICATIONS.md","path":"PUBLICATIONS.md","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"bin2hex.cmake","path":"bin2hex.cmake","contentType":"file"},{"name":"cuBLAS.cmake","path":"cuBLAS.cmake","contentType":"file"},{"name":"cuDNN.cmake","path":"cuDNN.cmake","contentType":"file"},{"name":"customConfigs.cmake","path":"customConfigs.cmake","contentType":"file"},{"name":"pyproject.toml","path":"pyproject.toml","contentType":"file"},{"name":"setup.cfg","path":"setup.cfg","contentType":"file"}],"templateDirectorySuggestionUrl":null,"readme":null,"totalCount":26,"showBranchInfobar":false},"fileTree":null,"fileTreeProcessingTime":null,"foldersToFetch":[],"treeExpanded":false,"symbolsExpanded":false,"isOverview":true,"overview":{"banners":{"shouldRecommendReadme":false,"isPersonalRepo":false,"showUseActionBanner":false,"actionSlug":null,"actionId":null,"showProtectBranchBanner":false,"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_repo","releasePath":"/NVIDIA/cutlass/releases/new?marketplace=true","showPublishActionBanner":false},"interactionLimitBanner":null,"showInvitationBanner":false,"inviterName":null,"actionsMigrationBannerInfo":{"releaseTags":[],"showImmutableActionsMigrationBanner":false,"initialMigrationStatus":null}},"codeButton":{"contactPath":"/contact","isEnterprise":false,"local":{"protocolInfo":{"httpAvailable":true,"sshAvailable":null,"httpUrl":"https://github.com/NVIDIA/cutlass.git","showCloneWarning":null,"sshUrl":null,"sshCertificatesRequired":null,"sshCertificatesAvailable":null,"ghCliUrl":"gh repo clone NVIDIA/cutlass","defaultProtocol":"http","newSshKeyUrl":"/settings/ssh/new","setProtocolPath":"/users/set_protocol"},"platformInfo":{"cloneUrl":"https://desktop.github.com","showVisualStudioCloneButton":false,"visualStudioCloneUrl":"https://windows.github.com","showXcodeCloneButton":false,"xcodeCloneUrl":"xcode://clone?repo=https%3A%2F%2Fgithub.com%2FNVIDIA%2Fcutlass","zipballUrl":"/NVIDIA/cutlass/archive/refs/heads/main.zip"}},"newCodespacePath":"/codespaces/new?hide_repo_select=true\u0026repo=112542515"},"popovers":{"rename":null,"renamedParentRepo":null},"commitCount":"601","overviewFiles":[{"displayName":"README.md","repoName":"cutlass","refName":"main","path":"README.md","preferredFileType":"readme","tabName":"README","richText":"\u003carticle class=\"markdown-body entry-content container-lg\" itemprop=\"text\"\u003e\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer\" href=\"/NVIDIA/cutlass/blob/main/media/images/gemm-hierarchy-with-epilogue-no-labels.png\"\u003e\u003cimg src=\"/NVIDIA/cutlass/raw/main/media/images/gemm-hierarchy-with-epilogue-no-labels.png\" alt=\"ALT\" title=\"Complete CUDA GEMM decomposition\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCUTLASS 3.8.0\u003c/h1\u003e\u003ca id=\"user-content-cutlass-380\" class=\"anchor\" aria-label=\"Permalink: CUTLASS 3.8.0\" href=\"#cutlass-380\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003cem\u003eCUTLASS 3.8.0 - January 2025\u003c/em\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS is a collection of CUDA C++ template abstractions for implementing\nhigh-performance matrix-matrix multiplication (GEMM) and related computations at all levels\nand scales within CUDA. It incorporates strategies for hierarchical decomposition and\ndata movement similar to those used to implement cuBLAS and cuDNN. CUTLASS decomposes\nthese \"moving parts\" into reusable, modular software components abstracted by C++ template\nclasses. Primitives for different levels of a conceptual parallelization hierarchy\ncan be specialized and tuned via custom tiling sizes, data types,\nand other algorithmic policy. The resulting flexibility simplifies their use\nas building blocks within custom kernels and applications.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo support a wide variety of applications, CUTLASS provides extensive support for\nmixed-precision computations, providing specialized data-movement and\nmultiply-accumulate abstractions for FP64, FP32, TF32, FP16, BF16,\n\u003ca href=\"/NVIDIA/cutlass/blob/main/examples/27_ampere_3xtf32_fast_accurate_tensorop_gemm\"\u003eFP32 emulation via tensor core instruction\u003c/a\u003e,\n8b floating point types (e5m2 and e4m3),\nblock scaled data types (NVIDIA NVFP4 and OCP standard MXFP4, MXFP6, MXFP8),\nnarrow integer types (4 and 8b signed and unsigned integers),\nand binary 1b data types (where architectures allow for the\nnative support of such data types).\nCUTLASS demonstrates optimal matrix multiply operations\ntargeting the programmable, high-throughput \u003cem\u003eTensor Cores\u003c/em\u003e implemented by\nNVIDIA's Volta, Turing, Ampere, Ada, Hopper, and Blackwell architectures.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn addition to GEMMs, CUTLASS implements high-performance convolution via\nthe implicit GEMM algorithm. Implicit GEMM is the formulation of a convolution\noperation as a GEMM thereby taking advantage of CUTLASS's modular GEMM pipeline.\nThis allows CUTLASS to build convolutions by reusing highly-optimized GEMM components.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eSee the \u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/quickstart.md\"\u003eQuick Start Guide\u003c/a\u003e to get started quickly.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eSee the \u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/functionality.md\"\u003efunctionality docs\u003c/a\u003e for a more comprehensive\nlist of kernel level features, data types, instructions, and minimum supported by CUTLASS on each GPU\narchitecture.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eWhat's New in CUTLASS 3.8\u003c/h1\u003e\u003ca id=\"user-content-whats-new-in-cutlass-38\" class=\"anchor\" aria-label=\"Permalink: What's New in CUTLASS 3.8\" href=\"#whats-new-in-cutlass-38\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS 3.8 is the first release that supports the NVIDIA Blackwell SM100 architecture.\nFor a background on Blackwell's new features, please consult the PTX documentation for CUDA 12.8.\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eSupport for new CuTe building blocks specifically for Blackwell architecture:\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cute/atom/mma_traits_sm100.hpp\"\u003e5th generation Blackwell Tensor Core instructions (TCGen05)\u003c/a\u003e via CuTe MMA atoms.\u003c/li\u003e\n\u003cli\u003eExtensions to \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cute/atom/copy_traits_sm100_tma.hpp\"\u003eTensor Memory Accelerator\u003c/a\u003e via CuTe Copy atoms.\u003c/li\u003e\n\u003cli\u003eExposure of Blackwell's new tensor memory (note: distinct from TMA) as \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cute/pointer.hpp#L290\"\u003e\u003ccode\u003etmem\u003c/code\u003e\u003c/a\u003e across CuTe as a first class data locale.\u003c/li\u003e\n\u003cli\u003eExposure of \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cute/atom/copy_traits_sm100.hpp\"\u003e\u003ccode\u003etmem-\u0026gt;rmem\u003c/code\u003e, \u003ccode\u003ermem-\u0026gt;tmem\u003c/code\u003e and \u003ccode\u003esmem-\u0026gt;tmem data movement instructions\u003c/code\u003e\u003c/a\u003e as copy atoms in CuTe.\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cute/atom/copy_traits_sm100.hpp\"\u003e\u003ccode\u003emake_tmem_copy()\u003c/code\u003e\u003c/a\u003e utility method to ease creation of tiled copies for tmem copy atoms.\u003c/li\u003e\n\u003cli\u003eSupport for \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cute/atom/copy_traits_sm100.hpp\"\u003enew variants of LDSM on Blackwell\u003c/a\u003e via CuTe Copy atoms.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003eSupport for new CUTLASS building blocks specifically for Blackwell architecture:\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eVarious narrow precision \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/exmy_base.h\"\u003eFP4, FP6, and FP8\u003c/a\u003e formats as well as their \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/float_subbyte.h\"\u003eblock-scaled variants NVFP4, MXFP4, MXFP6, and MXFP8\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/pipeline/sm100_pipeline.hpp\"\u003ePipelines that implement Blackwell specific synchronization\u003c/a\u003e.\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/cluster_launch.hpp\"\u003eCluster launch control API supporting preferred and fallback cluster shapes\u003c/a\u003e.\u003c/li\u003e\n\u003cli\u003eData types including NVFP4, MXFP4, MXFP6, and MXFP8 and all their supported element and scale factor types.\u003c/li\u003e\n\u003cli\u003eTile schedulers using \u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/blackwell_cluster_launch_control.md\"\u003eBlackwell's Cluster Launch Control (CLC) feature\u003c/a\u003e to implement dynamic persistence scheduling for \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/gemm/kernel/sm100_tile_scheduler.hpp\"\u003eGEMMs\u003c/a\u003e, and \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/gemm/kernel/sm100_tile_scheduler_stream_k.hpp\"\u003estream-K\u003c/a\u003e.\u003c/li\u003e\n\u003cli\u003eExtensions to testbeds and reference check code for unit tests and CUTLASS profiler.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003eFull support for Blackwell kernels in CUTLASS 3.x API:\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/gemm/kernel/sm100_gemm_tma_warpspecialized.hpp\"\u003eBlackwell specific kernel layers\u003c/a\u003e that\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eImplement a new warp-specialization recipe tuned specifically for Blackwell.\u003c/li\u003e\n\u003cli\u003eLeverage all the new features such as CLC based tile scheduling, preferred cluster, and TMEM based double buffering of accumulators.\u003c/li\u003e\n\u003cli\u003eSupport stream-K load balancing for all kernel types everywhere via composable scheduler support.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003eBlackwell collective mainloops that target the TCGen05 MMA instructions (both SS and TS) for\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/gemm/collective/sm100_mma_warpspecialized.hpp\"\u003eNon-block scaled data types without support for pointer array and grouped GEMM with TMA\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/gemm/collective/sm100_mma_array_warpspecialized.hpp\"\u003eNon-block scaled data types with support for pointer array and grouped GEMM with TMA\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/gemm/collective/sm100_blockscaled_mma_warpspecialized.hpp\"\u003eBlock scaled data types without support for pointer array and grouped GEMM with TMA\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/gemm/collective/sm100_blockscaled_mma_array_warpspecialized.hpp\"\u003eBlock scaled data types with support for pointer array and grouped GEMM with TMA\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003eBlackwell \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/conv/collective/sm100_implicit_gemm_umma_warpspecialized.hpp\"\u003ecollective mainloop for convolution kernels\u003c/a\u003e supporting non-block scaled data types for fprop, dgrad, and wgrad.\u003c/li\u003e\n\u003cli\u003eNew \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/gemm/dispatch_policy.hpp\"\u003eGEMM\u003c/a\u003e, \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/conv/dispatch_policy.hpp\"\u003econvolution\u003c/a\u003e, and \u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/epilogue/dispatch_policy.hpp\"\u003eepilogue\u003c/a\u003e dispatch policies for collectives, kernel layers, and builders.\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/include/cutlass/epilogue/collective/sm100_epilogue_tma_warpspecialized.hpp\"\u003eBlackwell epilogue that supports loading accumulators from \u003ccode\u003etmem\u003c/code\u003e\u003c/a\u003e and \u003ca href=\"/NVIDIA/cutlass/blob/main\"\u003efull set of EVT fusions\u003c/a\u003e.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003eCUTLASS library and profiler integration for block scaled data types for kernel emission, profiling, and verification.\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eSupport for preferred and fallback cluster shapes via profiler command line arguments parsing to set dynamic cluster shapes.\u003c/li\u003e\n\u003cli\u003eSupport for dynamic datatypes by parsing profiler via profiler command line arguments parsing to set dynamic datatype setting in TCGen05 MMA instruction descriptors.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003eSet of examples that demonstrate the usage of the 3.x API for targeting Blackwell\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/examples/70_blackwell_gemm\"\u003eBasic FP16 and FP8 GEMMs with minimal changes from Hopper examples\u003c/a\u003e, demonstrating ease of migration for off the shelf kernels using the 3.x collective builder API.\u003c/li\u003e\n\u003cli\u003eGEMM with \u003ca href=\"/NVIDIA/cutlass/blob/main/examples/71_blackwell_gemm_with_collective_builder/71_blackwell_gemm_with_collective_builder.cu\"\u003eopt-in collective builder schedules showcasing available recipes\u003c/a\u003e for Blackwell.\u003c/li\u003e\n\u003cli\u003eBlock scaled data type GEMMs targeting Blackwell's native block scaled Tensor Cores:\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/examples/72_blackwell_narrow_precision_gemm/72a_blackwell_nvfp4_bf16_gemm.cu\"\u003eNVFP4 inputs with BF16 output\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/examples/72_blackwell_narrow_precision_gemm/72b_blackwell_nvfp4_nvfp4_gemm.cu\"\u003eNVFP4 inputs with NVFP4 output\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/examples/72_blackwell_narrow_precision_gemm/72c_blackwell_mixed_mxfp8_bf16_gemm.cu\"\u003eMixed MXFP8 and MXFP6 inputs with BF16 output\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003eGEMM example demonstrating \u003ca href=\"/NVIDIA/cutlass/blob/main/examples/73_blackwell_gemm_preferred_cluster/blackwell_gemm_preferred_cluster.cu\"\u003eBlackwell's new preferred cluster support via dynamic cluster shapes\u003c/a\u003e for increased occupancy.\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/examples/74_blackwell_gemm_streamk/blackwell_gemm_streamk.cu\"\u003eGEMM with CLC based StreamK scheduler for load balancing\u003c/a\u003e.\u003c/li\u003e\n\u003cli\u003eGrouped GEMM for \u003ca href=\"/NVIDIA/cutlass/blob/main/examples/75_blackwell_grouped_gemm/75_blackwell_grouped_gemm.cu\"\u003evanilla FP8 data inputs\u003c/a\u003e and \u003ca href=\"/NVIDIA/cutlass/blob/main/examples/75_blackwell_grouped_gemm/75_blackwell_grouped_gemm_block_scaled.cu\"\u003eNVFP4 block scaled inputs\u003c/a\u003e.\u003c/li\u003e\n\u003cli\u003eConvolution kernels for \u003ca href=\"/NVIDIA/cutlass/blob/main/examples/76_blackwell_conv/76_blackwell_conv_fprop.cu\"\u003efprop\u003c/a\u003e, \u003ca href=\"/NVIDIA/cutlass/blob/main/examples/76_blackwell_conv/76_blackwell_conv_dgrad.cu\"\u003edgrad\u003c/a\u003e, and \u003ca href=\"/NVIDIA/cutlass/blob/main/examples/76_blackwell_conv/76_blackwell_conv_wgrad.cu\"\u003ewgrad\u003c/a\u003e.\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/examples/77_blackwell_fmha/77_blackwell_fmha.cu\"\u003eFused multi-head attention fprop kernel\u003c/a\u003e supporting fp16/bf16/fp8 data types across head dims of 32,64, and 128.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003eDocumentation updates:\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/quickstart.md#instantiating-a-blackwell-gemm-kernel\"\u003eQuickstart - instantiating a Blackwell block-scaled GEMM\u003c/a\u003e.\u003c/li\u003e\n\u003cli\u003eDetailed \u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/blackwell_functionality.md\"\u003eBlackwell block-scaled GEMM functionality documentation\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eA new \u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/functionality.md\"\u003efunctionality documentation\u003c/a\u003e specifically for 3.x API comprehensively documenting all supported kernel types, data types, kernel features, minimum CUDA tookit support etc for 3.x supported architectures.\u003c/li\u003e\n\u003cli\u003eUpdates to \u003ca href=\"/NVIDIA/cutlass/blob/main/README.md#compatibility\"\u003ecompatibility\u003c/a\u003e section regarding supported compilers, operating systems, CUDA Toolkits, Hardware Architectures, and \u003ca href=\"/NVIDIA/cutlass/blob/main/README.md#Target-Architecture\"\u003eTarget Architecture\u003c/a\u003e.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eNote: CUTLASS 3.x builds are known to be down on Windows platforms for all CUDA toolkits.\nCUTLASS team is working on a fix.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003eSee the \u003ca href=\"/NVIDIA/cutlass/blob/main/CHANGELOG.md\"\u003eCHANGELOG\u003c/a\u003e for details of all past releases and updates.\u003c/strong\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePerformance\u003c/h1\u003e\u003ca id=\"user-content-performance\" class=\"anchor\" aria-label=\"Permalink: Performance\" href=\"#performance\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS primitives are very efficient. When used to construct device-wide GEMM kernels,\nthey exhibit nearly optimal utilization of peak theoretical throughput. The figure below\nshows CUTLASS 3.8's performance as a % of theoretical peak utilization\non various input and output data types when run on NVIDIA Blackwell SM100 architecture GPU.\u003c/p\u003e\n\u003cp align=\"center\" dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer\" href=\"/NVIDIA/cutlass/blob/main/media/images/cutlass-3.8-blackwell-gemm-peak-performance.svg\"\u003e\u003cimg src=\"/NVIDIA/cutlass/raw/main/media/images/cutlass-3.8-blackwell-gemm-peak-performance.svg\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe two figures below show the continual CUTLASS performance improvements\non an \u003ca href=\"https://www.nvidia.com/en-us/data-center/h100/\" rel=\"nofollow\"\u003eNVIDIA H100\u003c/a\u003e (NVIDIA Hopper architecture) since\nCUTLASS 3.1.\nCUTLASS 3.5.1 was compiled with the \u003ca href=\"https://developer.nvidia.com/cuda-downloads\" rel=\"nofollow\"\u003eCUDA 12.5u1 Toolkit\u003c/a\u003e.\nTensor Core operations are implemented using CUDA's\n\u003ca href=\"https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-mma\" rel=\"nofollow\"\u003emma\u003c/a\u003e and\n\u003ca href=\"https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions\" rel=\"nofollow\"\u003ewgmma\u003c/a\u003e instructions.\u003c/p\u003e\n\u003cp align=\"center\" dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer\" href=\"/NVIDIA/cutlass/blob/main/media/images/cutlass-3.5.1-gemm-peak-performance.png\"\u003e\u003cimg src=\"/NVIDIA/cutlass/raw/main/media/images/cutlass-3.5.1-gemm-peak-performance.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp align=\"center\" dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer\" href=\"/NVIDIA/cutlass/blob/main/media/images/cutlass-3.5.1-gemm-peak-performance-fp8.png\"\u003e\u003cimg src=\"/NVIDIA/cutlass/raw/main/media/images/cutlass-3.5.1-gemm-peak-performance-fp8.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCuTe\u003c/h1\u003e\u003ca id=\"user-content-cute\" class=\"anchor\" aria-label=\"Permalink: CuTe\" href=\"#cute\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS 3.0 introduced a new core library, CuTe, to describe and manipulate tensors of threads and data.\nCuTe is a collection of C++ CUDA template abstractions for\ndefining and operating on hierarchically multidimensional layouts of threads and data.\nCuTe provides \u003ccode\u003eLayout\u003c/code\u003e and \u003ccode\u003eTensor\u003c/code\u003e objects that compactly package the type,\nshape, memory space, and layout of data, while performing the complicated indexing for the user.\nThis lets programmers focus on the logical descriptions of their algorithms while\nCuTe does the mechanical bookkeeping for them. With these tools, we can quickly design,\nimplement, and modify all dense linear algebra operations.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe core abstractions of CuTe are hierarchically multidimensional layouts\nwhich can be composed with data arrays to represent tensors.\nThe representation of layouts is powerful enough to represent nearly\neverything we need to implement efficient dense linear algebra.\nLayouts can also be combined and manipulated via functional composition, on which we build a large set of common operations such as tiling and partitioning.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS 3.0 and beyond adopts CuTe throughout the GEMM hierarchy in its templates.\nThis greatly simplifies the design and improves code composability and readability.\nMore documentation specific to CuTe can be found in its\n\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/cute/00_quickstart.md\"\u003ededicated documentation directory\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCompatibility\u003c/h1\u003e\u003ca id=\"user-content-compatibility\" class=\"anchor\" aria-label=\"Permalink: Compatibility\" href=\"#compatibility\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eMinimum requirements:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eArchitecture: Volta (compute capability 7.0)\u003c/li\u003e\n\u003cli\u003eCompiler: Must support at least C++17\u003c/li\u003e\n\u003cli\u003eCUDA Toolkit version: 11.4\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS requires a C++17 host compiler and\nperforms best when built with the \u003ca href=\"https://developer.nvidia.com/cuda-downloads\" rel=\"nofollow\"\u003e\u003cstrong\u003eCUDA 12.8 Toolkit\u003c/strong\u003e\u003c/a\u003e.\nIt is also compatible with CUDA 11.4, CUDA 11.5, CUDA 11.6, CUDA 11.7, CUDA 11.8, and all other CUDA 12.x versions.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eOperating Systems\u003c/h2\u003e\u003ca id=\"user-content-operating-systems\" class=\"anchor\" aria-label=\"Permalink: Operating Systems\" href=\"#operating-systems\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe have tested the following environments.\u003c/p\u003e\n\u003cmarkdown-accessiblity-table\u003e\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003e\u003cstrong\u003eOperating System\u003c/strong\u003e\u003c/th\u003e\n\u003cth\u003e\u003cstrong\u003eCompiler\u003c/strong\u003e\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eUbuntu 18.04\u003c/td\u003e\n\u003ctd\u003eGCC 7.5.0\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eUbuntu 20.04\u003c/td\u003e\n\u003ctd\u003eGCC 10.3.0\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eUbuntu 22.04\u003c/td\u003e\n\u003ctd\u003eGCC 11.2.0\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\u003c/markdown-accessiblity-table\u003e\n\u003cp dir=\"auto\"\u003eNote: GCC 8.5.0 has known regressions regarding fold expressions and overloaded operators. Using GCC 7.5.0 or (preferred) GCC \u0026gt;= 9 is recommended.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote: CUTLASS 3.x builds are known to be down on Windows platforms for all CUDA toolkits.\nCUTLASS team is working on a fix.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eHardware\u003c/h2\u003e\u003ca id=\"user-content-hardware\" class=\"anchor\" aria-label=\"Permalink: Hardware\" href=\"#hardware\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS runs successfully on the following NVIDIA GPUs, and it is expected to be efficient on Volta, Turing, Ampere, Ada, and Hopper architecture based NVIDIA GPUs.\u003c/p\u003e\n\u003cmarkdown-accessiblity-table\u003e\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003e\u003cstrong\u003eGPU\u003c/strong\u003e\u003c/th\u003e\n\u003cth\u003e\u003cstrong\u003eCUDA Compute Capability\u003c/strong\u003e\u003c/th\u003e\n\u003cth\u003e\u003cstrong\u003eMinimum CUDA Toolkit Required by CUTLASS-3\u003c/strong\u003e\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA V100 Tensor Core GPU\u003c/td\u003e\n\u003ctd\u003e7.0\u003c/td\u003e\n\u003ctd\u003e11.4\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA TitanV\u003c/td\u003e\n\u003ctd\u003e7.0\u003c/td\u003e\n\u003ctd\u003e11.4\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA GeForce RTX 20x0 series\u003c/td\u003e\n\u003ctd\u003e7.5\u003c/td\u003e\n\u003ctd\u003e11.4\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA T4\u003c/td\u003e\n\u003ctd\u003e7.5\u003c/td\u003e\n\u003ctd\u003e11.4\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA A100 Tensor Core GPU\u003c/td\u003e\n\u003ctd\u003e8.0\u003c/td\u003e\n\u003ctd\u003e11.4\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA A10\u003c/td\u003e\n\u003ctd\u003e8.6\u003c/td\u003e\n\u003ctd\u003e11.4\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA GeForce RTX 30x0 series\u003c/td\u003e\n\u003ctd\u003e8.6\u003c/td\u003e\n\u003ctd\u003e11.4\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA GeForce RTX 40x0 series\u003c/td\u003e\n\u003ctd\u003e8.9\u003c/td\u003e\n\u003ctd\u003e11.8\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA L40\u003c/td\u003e\n\u003ctd\u003e8.9\u003c/td\u003e\n\u003ctd\u003e11.8\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA H100 Tensor Core GPU\u003c/td\u003e\n\u003ctd\u003e9.0\u003c/td\u003e\n\u003ctd\u003e11.8\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA H200 Tensor Core GPU\u003c/td\u003e\n\u003ctd\u003e9.0\u003c/td\u003e\n\u003ctd\u003e11.8\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNVIDIA B200 Tensor Core GPU\u003c/td\u003e\n\u003ctd\u003e10.0\u003c/td\u003e\n\u003ctd\u003e12.8\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\u003c/markdown-accessiblity-table\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eTarget Architecture\u003c/h2\u003e\u003ca id=\"user-content-target-architecture\" class=\"anchor\" aria-label=\"Permalink: Target Architecture\" href=\"#target-architecture\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIn general, PTX code generated for one target architecture can be run on future architectures\n(i.e., it is forward compatible).\nHowever, CUDA 12.0 introduced the concept of \"architecture-accelerated features\" whose\nPTX does not have forward compatibility guarantees.\nSeveral Hopper and Blackwell PTX instructions fall under this category of\narchitecture-accelerated features, and thus require a \u003ccode\u003esm_90a\u003c/code\u003e or \u003ccode\u003esm100a\u003c/code\u003e target architecture\n(note the \"a\" appended). For more details on this and other architecture-accelerated instructions,\nplease refer to the \u003ca href=\"https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#feature-availability\" rel=\"nofollow\"\u003eCUDA Documentation\u003c/a\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe target architecture information is passed on to CUTLASS via the cmake flag\n\u003ccode\u003eCUTLASS_NVCC_ARCHS\u003c/code\u003e. In order to maximize performance on Hopper GH100,\nusers are required to build CUTLASS with \u003ccode\u003e90a\u003c/code\u003e as the target architecture.\nIf a user accidentally builds a kernel which uses SM90a features\n(e.g. Hopper Tensor Core Instructions), using the SM90 target\n(note the lack of \"a\"), with either CUDA Toolkit 12 or 11.8,\nthe kernel is expected to fail with a runtime error.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"cmake .. -DCUTLASS_NVCC_ARCHS=\u0026quot;90a\u0026quot;\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003ecmake .. -DCUTLASS_NVCC_ARCHS=\"90a\"\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eOr\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"cmake .. -DCUTLASS_NVCC_ARCHS=\u0026quot;100a\u0026quot; \"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003ecmake .. -DCUTLASS_NVCC_ARCHS=\"100a\" \n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNote: The NVIDIA Blackwell SM100 architecture used in the datacenter\nproducts has a different compute capability than the one underpinning\nNVIDIA Blackwell GeForce RTX 50 series GPUs. As a result, kernels\ncompiled for Blackwell SM100 architecture with arch conditional features\n(using \u003ccode\u003esm100a\u003c/code\u003e) are not compatible with RTX 50 series GPUs.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003ePlease refer to the \u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/functionality.md\"\u003efunctionality documentation\u003c/a\u003e\nfor details on which kernels require which target architectures.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eDocumentation\u003c/h1\u003e\u003ca id=\"user-content-documentation\" class=\"anchor\" aria-label=\"Permalink: Documentation\" href=\"#documentation\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS is described in the following documents and the accompanying\n\u003ca href=\"https://nvidia.github.io/cutlass\" rel=\"nofollow\"\u003eDoxygen documentation\u003c/a\u003e.\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/quickstart.md\"\u003eQuick Start Guide\u003c/a\u003e - basics of building and running CUTLASS\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/functionality.md\"\u003eFunctionality\u003c/a\u003e - summarizes functionality available in CUTLASS\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/efficient_gemm.md\"\u003eEfficient GEMM in CUDA\u003c/a\u003e - describes how GEMM kernels may be implemented efficiently in CUDA\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/cutlass_3x_design.md\"\u003eCUTLASS 3.x Design\u003c/a\u003e - describes the CUTLASS 3.x design, its benefits, and how CuTe enables us to write much more composable components\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/gemm_api_3x.md\"\u003eGEMM API 3.x\u003c/a\u003e - describes the CUTLASS 3.x GEMM model and C++ template concepts\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/gemm_api.md\"\u003eGEMM API 2.x\u003c/a\u003e - describes the CUTLASS 2.x GEMM model and C++ template concepts\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/implicit_gemm_convolution.md\"\u003eImplicit GEMM Convolution\u003c/a\u003e - describes 2-D and 3-D convolution in CUTLASS\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/code_organization.md\"\u003eCode Organization\u003c/a\u003e - describes the organization and contents of the CUTLASS project\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/terminology.md\"\u003eTerminology\u003c/a\u003e - describes terms used in the code\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/programming_guidelines.md\"\u003eProgramming Guidelines\u003c/a\u003e - guidelines for writing efficient modern CUDA C++\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/fundamental_types.md\"\u003eFundamental types\u003c/a\u003e - describes basic C++ classes used in CUTLASS to represent numeric quantities and arrays\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/layout.md\"\u003eLayouts\u003c/a\u003e - describes layouts of matrices and tensors in memory\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/tile_iterator_concept.md\"\u003eTile Iterators\u003c/a\u003e - describes C++ concepts for iterating over tiles of matrices in memory\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/profiler.md\"\u003eCUTLASS Profiler\u003c/a\u003e - command-line driven profiling application\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/utilities.md\"\u003eCUTLASS Utilities\u003c/a\u003e - additional templates used to facilitate rapid development\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/dependent_kernel_launch.md\"\u003eDependent kernel launch\u003c/a\u003e - describes a new feature in Hopper which allows overlapping dependent\nkernels in the same stream, and how it is used in CUTLASS.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eResources\u003c/h1\u003e\u003ca id=\"user-content-resources\" class=\"anchor\" aria-label=\"Permalink: Resources\" href=\"#resources\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe have also described the structure of an efficient GEMM in our talk at the\n\u003ca href=\"http://on-demand.gputechconf.com/gtc/2018/presentation/s8854-cutlass-software-primitives-for-dense-linear-algebra-at-all-levels-and-scales-within-cuda.pdf\" rel=\"nofollow\"\u003eGPU Technology Conference 2018\u003c/a\u003e.\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"https://www.nvidia.com/en-us/on-demand/session/gtcsiliconvalley2018-s8854/\" rel=\"nofollow\"\u003eCUTLASS: Software Primitives for Dense Linear Algebra at All Levels and Scales within CUDA\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://www.nvidia.com/en-us/on-demand/session/gtcsj20-s21745/\" rel=\"nofollow\"\u003eDeveloping CUDA Kernels to Push Tensor Cores to the Absolute Limit on NVIDIA A100\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s31883/\" rel=\"nofollow\"\u003eAccelerating Convolution with Tensor Cores in CUTLASS\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://www.nvidia.com/en-us/on-demand/session/gtcspring22-s41996/\" rel=\"nofollow\"\u003eAccelerating Backward Data Gradient by Increasing Tensor Core Utilization in CUTLASS\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://www.nvidia.com/en-us/on-demand/session/gtcfall22-a41131/\" rel=\"nofollow\"\u003eCUTLASS: Python API, Enhancements, and NVIDIA Hopper\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eBuilding CUTLASS\u003c/h1\u003e\u003ca id=\"user-content-building-cutlass\" class=\"anchor\" aria-label=\"Permalink: Building CUTLASS\" href=\"#building-cutlass\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS is a header-only template library and does not need to be built to be used by other\nprojects. Client applications should target CUTLASS's \u003ccode\u003einclude/\u003c/code\u003e directory in their include\npaths.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS unit tests, examples, and utilities can be build with CMake.\nThe minimum version of CMake is given in the \u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/quickstart.md\"\u003eQuickstart guide\u003c/a\u003e.\nMake sure the \u003ccode\u003eCUDACXX\u003c/code\u003e environment variable points to NVCC in the CUDA Toolkit installed\non your system.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ export CUDACXX=${CUDA_INSTALL_PATH}/bin/nvcc\"\u003e\u003cpre\u003e$ \u003cspan class=\"pl-k\"\u003eexport\u003c/span\u003e CUDACXX=\u003cspan class=\"pl-smi\"\u003e${CUDA_INSTALL_PATH}\u003c/span\u003e/bin/nvcc\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCreate a build directory within the CUTLASS project, then run CMake. By default CUTLASS will build kernels\nfor CUDA architecture versions 5.0, 6.0, 6.1, 7.0, 7.5, 8.0, 8.6, 8.9, and 9.0.\nTo reduce compile time you can specify\nthe architectures to build CUTLASS for by changing the CMake configuration setting\n\u003ccode\u003eCUTLASS_NVCC_ARCHS\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ mkdir build \u0026amp;\u0026amp; cd build\n\n$ cmake .. -DCUTLASS_NVCC_ARCHS=80 # compiles for NVIDIA's Ampere Architecture\"\u003e\u003cpre\u003e$ mkdir build \u003cspan class=\"pl-k\"\u003e\u0026amp;\u0026amp;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ecd\u003c/span\u003e build\n\n$ cmake .. -DCUTLASS_NVCC_ARCHS=80 \u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e compiles for NVIDIA's Ampere Architecture\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFrom the \u003ccode\u003ebuild/\u003c/code\u003e directory, compile and run the CUTLASS unit tests by building the target \u003ccode\u003etest_unit\u003c/code\u003e with make.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe unit tests are organized as several binaries mirroring the top-level namespaces of CUTLASS,\nand they may be executed in parallel via make's \u003ccode\u003e-j\u003c/code\u003e command line argument.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ make test_unit -j\n...\n...\n...\n[----------] Global test environment tear-down\n[==========] 946 tests from 57 test cases ran. (10812 ms total)\n[ PASSED ] 946 tests.\"\u003e\u003cpre\u003e$ make test_unit -j\n...\n...\n...\n[----------] Global \u003cspan class=\"pl-c1\"\u003etest\u003c/span\u003e environment tear-down\n[\u003cspan class=\"pl-k\"\u003e==========\u003c/span\u003e] 946 tests from 57 \u003cspan class=\"pl-c1\"\u003etest\u003c/span\u003e cases ran. (10812 ms total)\n[ PASSED ] 946 tests.\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAll tests should pass on supported platforms, though the exact number of tests may vary over time.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eProject Structure\u003c/h1\u003e\u003ca id=\"user-content-project-structure\" class=\"anchor\" aria-label=\"Permalink: Project Structure\" href=\"#project-structure\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS is arranged as a header-only library along with Utilities, Tools, Examples, and unit tests.\n\u003ca href=\"https://nvidia.github.io/cutlass\" rel=\"nofollow\"\u003eDoxygen documentation\u003c/a\u003e provides a complete list of files, classes,\nand template concepts defined in the CUTLASS project.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eA detailed explanation of the source code organization may be found in the\n\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/code_organization.md\"\u003eCUTLASS documentation\u003c/a\u003e, but several main components are summarized below.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCUTLASS Template Library\u003c/h2\u003e\u003ca id=\"user-content-cutlass-template-library\" class=\"anchor\" aria-label=\"Permalink: CUTLASS Template Library\" href=\"#cutlass-template-library\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"include/ # client applications should target this directory in their build's include paths\n\n cutlass/ # CUDA Templates for Linear Algebra Subroutines and Solvers - headers only\n\n arch/ # direct exposure of architecture features (including instruction-level GEMMs)\n\n conv/ # code specialized for convolution\n\n epilogue/ # code specialized for the epilogue of gemm/convolution\n\n gemm/ # code specialized for general matrix product computations\n\n layout/ # layout definitions for matrices, tensors, and other mathematical objects in memory\n\n platform/ # CUDA-capable Standard Library components\n\n reduction/ # bandwidth-limited reduction kernels that do not fit the \u0026quot;gemm\u0026quot; model\n\n thread/ # simt code that can be performed within a CUDA thread\n \n transform/ # code specialized for layout, type, and domain transformations\n\n * # core vocabulary types, containers, and basic numeric operations\n\n cute/ # CuTe Layout, layout algebra, MMA/Copy atoms, tiled MMA/Copy\n\n algorithm/ # Definitions of core operations such as copy, gemm, and operations on cute::tuples\n\n arch/ # Bare bones PTX wrapper structs for copy and math instructions\n\n atom/ # Meta-information either link to or built from arch/ operators\n\n mma_atom.hpp # cute::Mma_Atom and cute::TiledMma\n\n copy_atom.hpp # cute::Copy_Atom and cute::TiledCopy\n\n *sm*.hpp # Arch specific meta-information for copy and math operations\n\n * # Core library types such as Shape, Stride, Layout, Tensor, and associated operations\n\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003einclude/ # client applications should target this directory in their build's include paths\n\n cutlass/ # CUDA Templates for Linear Algebra Subroutines and Solvers - headers only\n\n arch/ # direct exposure of architecture features (including instruction-level GEMMs)\n\n conv/ # code specialized for convolution\n\n epilogue/ # code specialized for the epilogue of gemm/convolution\n\n gemm/ # code specialized for general matrix product computations\n\n layout/ # layout definitions for matrices, tensors, and other mathematical objects in memory\n\n platform/ # CUDA-capable Standard Library components\n\n reduction/ # bandwidth-limited reduction kernels that do not fit the \"gemm\" model\n\n thread/ # simt code that can be performed within a CUDA thread\n \n transform/ # code specialized for layout, type, and domain transformations\n\n * # core vocabulary types, containers, and basic numeric operations\n\n cute/ # CuTe Layout, layout algebra, MMA/Copy atoms, tiled MMA/Copy\n\n algorithm/ # Definitions of core operations such as copy, gemm, and operations on cute::tuples\n\n arch/ # Bare bones PTX wrapper structs for copy and math instructions\n\n atom/ # Meta-information either link to or built from arch/ operators\n\n mma_atom.hpp # cute::Mma_Atom and cute::TiledMma\n\n copy_atom.hpp # cute::Copy_Atom and cute::TiledCopy\n\n *sm*.hpp # Arch specific meta-information for copy and math operations\n\n * # Core library types such as Shape, Stride, Layout, Tensor, and associated operations\n\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCUTLASS SDK Examples\u003c/h3\u003e\u003ca id=\"user-content-cutlass-sdk-examples\" class=\"anchor\" aria-label=\"Permalink: CUTLASS SDK Examples\" href=\"#cutlass-sdk-examples\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/examples\"\u003eCUTLASS SDK examples\u003c/a\u003e apply CUTLASS templates to implement basic computations.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eTools\u003c/h3\u003e\u003ca id=\"user-content-tools\" class=\"anchor\" aria-label=\"Permalink: Tools\" href=\"#tools\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"tools/\n library/ # CUTLASS Instance Library - contains instantiations of all supported CUTLASS templates\n include/\n cutlass/\n library/\n\n profiler/ # CUTLASS Profiler - command-line utility for executing operations in the\n # CUTLASS Library\n \n util/ # CUTLASS Utilities - contains numerous helper classes for\n include/ # manging tensors in device memory, reference\n cutlass/ # implementations for GEMM, random initialization\n util/ # of tensors, and I/O.\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003etools/\n library/ # CUTLASS Instance Library - contains instantiations of all supported CUTLASS templates\n include/\n cutlass/\n library/\n\n profiler/ # CUTLASS Profiler - command-line utility for executing operations in the\n # CUTLASS Library\n \n util/ # CUTLASS Utilities - contains numerous helper classes for\n include/ # manging tensors in device memory, reference\n cutlass/ # implementations for GEMM, random initialization\n util/ # of tensors, and I/O.\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eTest\u003c/h3\u003e\u003ca id=\"user-content-test\" class=\"anchor\" aria-label=\"Permalink: Test\" href=\"#test\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe \u003ccode\u003etest/unit/\u003c/code\u003e directory consist of unit tests implemented with Google Test that demonstrate\nbasic usage of Core API components and complete tests of the CUTLASS GEMM computations.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eInstructions for building and running the Unit tests are described in the \u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/quickstart.md\"\u003eQuickstart guide\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePerformance Profiling\u003c/h1\u003e\u003ca id=\"user-content-performance-profiling\" class=\"anchor\" aria-label=\"Permalink: Performance Profiling\" href=\"#performance-profiling\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe \u003ccode\u003etools/profiler/\u003c/code\u003e directory contains a command-line utility for launching each of the GEMM kernels.\nIt can be built as follows:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ make cutlass_profiler -j16\"\u003e\u003cpre\u003e$ make cutlass_profiler -j16\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eBuilding all GEMM and Convolution kernels (\u003cem\u003elong\u003c/em\u003e build times)\u003c/h2\u003e\u003ca id=\"user-content-building-all-gemm-and-convolution-kernels-long-build-times\" class=\"anchor\" aria-label=\"Permalink: Building all GEMM and Convolution kernels (long build times)\" href=\"#building-all-gemm-and-convolution-kernels-long-build-times\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eBy default, only one tile size is instantiated for each data type, math instruction, and layout.\nTo instantiate all, set the following environment variable when running CMake from an empty \u003ccode\u003ebuild/\u003c/code\u003e directory.\nBeware, this results in \u003cem\u003etens of thousands\u003c/em\u003e of kernels and long build times.\nThis would also result in a large binary size and on some platforms linker to fail on building the library.\nTherefore, it's highly recommended to generate only a subset of kernels as demonstrated in the sub-section below.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ cmake .. -DCUTLASS_NVCC_ARCHS=90a -DCUTLASS_LIBRARY_KERNELS=all\n...\n$ make cutlass_profiler -j16\"\u003e\u003cpre\u003e$ cmake .. -DCUTLASS_NVCC_ARCHS=90a -DCUTLASS_LIBRARY_KERNELS=all\n...\n$ make cutlass_profiler -j16\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eBuilding a subset of GEMM and Convolution kernels (\u003cem\u003ereduced\u003c/em\u003e build times)\u003c/h2\u003e\u003ca id=\"user-content-building-a-subset-of-gemm-and-convolution-kernels-reduced-build-times\" class=\"anchor\" aria-label=\"Permalink: Building a subset of GEMM and Convolution kernels (reduced build times)\" href=\"#building-a-subset-of-gemm-and-convolution-kernels-reduced-build-times\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo compile strictly one kernel or a small set of kernels, a comma-delimited list of kernel names with\nwildcard characters may be used to reduce the set of kernels. The following examples show building exactly one\nor a subset of kernels for NVIDIA Ampere and Turing architecture:\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eBuilding a subset Tensor Core GEMM kernels\u003c/h3\u003e\u003ca id=\"user-content-building-a-subset-tensor-core-gemm-kernels\" class=\"anchor\" aria-label=\"Permalink: Building a subset Tensor Core GEMM kernels\" href=\"#building-a-subset-tensor-core-gemm-kernels\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo compile a subset of Tensor Core GEMM kernels with FP32 accumulation and FP16 input targeting NVIDIA Ampere and Turing architecture,\nuse the below cmake command line:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s*gemm_f16_*_nt_align8\n...\n$ make cutlass_profiler -j16\"\u003e\u003cpre\u003e$ cmake .. -DCUTLASS_NVCC_ARCHS=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e'\u003c/span\u003e75;80\u003cspan class=\"pl-pds\"\u003e'\u003c/span\u003e\u003c/span\u003e -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s\u003cspan class=\"pl-k\"\u003e*\u003c/span\u003egemm_f16_\u003cspan class=\"pl-k\"\u003e*\u003c/span\u003e_nt_align8\n...\n$ make cutlass_profiler -j16\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eExample command line for profiling a subset of Tensor Core GEMM kernels is as follows:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s*gemm_f16_*_nt_align8 --m=3456 --n=4096 --k=4096\n\n...\n=============================\n Problem ID: 1\n\n Provider: CUTLASS\n OperationKind: gemm\n Operation: cutlass_tensorop_s1688gemm_f16_256x128_32x2_nt_align8\n\n Status: Success\n Verification: ON\n Disposition: Passed\n\nreference_device: Passed\n cuBLAS: Passed\n\n Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=4096 --A=f16:column --B=f16:row --C=f32:column --alpha=1 \\\n --beta=0 --split_k_slices=1 --batch_count=1 --op_class=tensorop --accum=f32 --cta_m=256 --cta_n=128 \\\n --cta_k=32 --stages=2 --warps_m=4 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=8 --min_cc=75 \\\n --max_cc=1024\n\n Bytes: 118489088 bytes\n FLOPs: 115992428544 flops\n\n Runtime: 1.55948 ms\n Memory: 70.7616 GiB/s\n\n Math: 74378.8 GFLOP/s\n\n\n\n=============================\n...\"\u003e\u003cpre\u003e./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s\u003cspan class=\"pl-k\"\u003e*\u003c/span\u003egemm_f16_\u003cspan class=\"pl-k\"\u003e*\u003c/span\u003e_nt_align8 --m=3456 --n=4096 --k=4096\n\n...\n=============================\n Problem ID: 1\n\n Provider: CUTLASS\n OperationKind: gemm\n Operation: cutlass_tensorop_s1688gemm_f16_256x128_32x2_nt_align8\n\n Status: Success\n Verification: ON\n Disposition: Passed\n\nreference_device: Passed\n cuBLAS: Passed\n\n Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=4096 --A=f16:column --B=f16:row --C=f32:column --alpha=1 \\\n --beta=0 --split_k_slices=1 --batch_count=1 --op_class=tensorop --accum=f32 --cta_m=256 --cta_n=128 \\\n --cta_k=32 --stages=2 --warps_m=4 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=8 --min_cc=75 \\\n --max_cc=1024\n\n Bytes: 118489088 bytes\n FLOPs: 115992428544 flops\n\n Runtime: 1.55948 ms\n Memory: 70.7616 GiB/s\n\n Math: 74378.8 GFLOP/s\n\n\n\n=============================\n...\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eBuilding one CUDA Core GEMM kernel\u003c/h3\u003e\u003ca id=\"user-content-building-one-cuda-core-gemm-kernel\" class=\"anchor\" aria-label=\"Permalink: Building one CUDA Core GEMM kernel\" href=\"#building-one-cuda-core-gemm-kernel\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo compile one SGEMM kernel targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sgemm_128x128_8x2_nn_align1\n...\n$ make cutlass_profiler -j16\"\u003e\u003cpre\u003e$ cmake .. -DCUTLASS_NVCC_ARCHS=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e'\u003c/span\u003e75;80\u003cspan class=\"pl-pds\"\u003e'\u003c/span\u003e\u003c/span\u003e -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sgemm_128x128_8x2_nn_align1\n...\n$ make cutlass_profiler -j16\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eExample command line for profiling single SGEMM CUDA kernel is as follows:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ ./tools/profiler/cutlass_profiler --kernels=sgemm --m=3456 --n=4096 --k=4096\n\n=============================\n Problem ID: 1\n\n Provider: CUTLASS\n OperationKind: gemm\n Operation: cutlass_simt_sgemm_128x128_8x2_nn_align1\n\n Status: Success\n Verification: ON\n Disposition: Passed\n\n cuBLAS: Passed\n\n Arguments: --m=3456 --n=4096 --k=4096 --A=f32:column --B=f32:column --C=f32:column --alpha=1 --beta=0 --split_k_slices=1 \\\n --batch_count=1 --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \\\n --warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024\n\n Bytes: 180355072 bytes\n FLOPs: 115992428544 flops\n\n Runtime: 6.73655 ms\n Memory: 24.934 GiB/s\n\n Math: 17218.4 GFLOP/s\n\n=============================\"\u003e\u003cpre\u003e$ ./tools/profiler/cutlass_profiler --kernels=sgemm --m=3456 --n=4096 --k=4096\n\n=============================\n Problem ID: 1\n\n Provider: CUTLASS\n OperationKind: gemm\n Operation: cutlass_simt_sgemm_128x128_8x2_nn_align1\n\n Status: Success\n Verification: ON\n Disposition: Passed\n\n cuBLAS: Passed\n\n Arguments: --m=3456 --n=4096 --k=4096 --A=f32:column --B=f32:column --C=f32:column --alpha=1 --beta=0 --split_k_slices=1 \\\n --batch_count=1 --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \\\n --warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024\n\n Bytes: 180355072 bytes\n FLOPs: 115992428544 flops\n\n Runtime: 6.73655 ms\n Memory: 24.934 GiB/s\n\n Math: 17218.4 GFLOP/s\n\n=============================\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eBuilding a subset of Tensor Core Convolution kernels\u003c/h3\u003e\u003ca id=\"user-content-building-a-subset-of-tensor-core-convolution-kernels\" class=\"anchor\" aria-label=\"Permalink: Building a subset of Tensor Core Convolution kernels\" href=\"#building-a-subset-of-tensor-core-convolution-kernels\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo compile a subset of Tensor core convolution kernels implementing forward propagation (fprop) with FP32 accumulation\nand FP16 input targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s*fprop_optimized_f16\n...\n$ make cutlass_profiler -j16\"\u003e\u003cpre\u003e$ cmake .. -DCUTLASS_NVCC_ARCHS=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e'\u003c/span\u003e75;80\u003cspan class=\"pl-pds\"\u003e'\u003c/span\u003e\u003c/span\u003e -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s\u003cspan class=\"pl-k\"\u003e*\u003c/span\u003efprop_optimized_f16\n...\n$ make cutlass_profiler -j16\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eExample command line for profiling a subset of Tensor Core convolution kernels is as follows:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ ./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s*fprop_optimized_f16 --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3\n\n...\n=============================\n Problem ID: 1\n\n Provider: CUTLASS\n OperationKind: conv2d\n Operation: cutlass_tensorop_s16816fprop_optimized_f16_128x128_32x5_nhwc\n\n Status: Success\n Verification: ON\n Disposition: Passed\n\nreference_device: Passed\n\n Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \\\n --stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f16:nhwc --Filter=f16:nhwc --Output=f32:nhwc \\\n --conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \\\n --eq_gemm_provider=none --op_class=tensorop --accum=f32 --cta_m=128 --cta_n=128 --cta_k=32 --stages=5 \\\n --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=16 --min_cc=80 --max_cc=1024\n\n Bytes: 1130659840 bytes\n FLOPs: 118482796544 flops\n\n Runtime: 0.711496 ms\n Memory: 1479.99 GiB/s\n\n Math: 166526 GFLOP/s\n\n=============================\n...\"\u003e\u003cpre\u003e$ ./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s\u003cspan class=\"pl-k\"\u003e*\u003c/span\u003efprop_optimized_f16 --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3\n\n...\n=============================\n Problem ID: 1\n\n Provider: CUTLASS\n OperationKind: conv2d\n Operation: cutlass_tensorop_s16816fprop_optimized_f16_128x128_32x5_nhwc\n\n Status: Success\n Verification: ON\n Disposition: Passed\n\nreference_device: Passed\n\n Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \\\n --stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f16:nhwc --Filter=f16:nhwc --Output=f32:nhwc \\\n --conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \\\n --eq_gemm_provider=none --op_class=tensorop --accum=f32 --cta_m=128 --cta_n=128 --cta_k=32 --stages=5 \\\n --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=16 --min_cc=80 --max_cc=1024\n\n Bytes: 1130659840 bytes\n FLOPs: 118482796544 flops\n\n Runtime: 0.711496 ms\n Memory: 1479.99 GiB/s\n\n Math: 166526 GFLOP/s\n\n=============================\n...\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eBuilding one Convolution CUDA kernel\u003c/h3\u003e\u003ca id=\"user-content-building-one-convolution-cuda-kernel\" class=\"anchor\" aria-label=\"Permalink: Building one Convolution CUDA kernel\" href=\"#building-one-convolution-cuda-kernel\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo compile and run one CUDA Core convolution kernel implementing forward propagation (fprop) with F32 accumulation\nand FP32 input targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc\n...\n$ make cutlass_profiler -j16\"\u003e\u003cpre\u003e$ cmake .. -DCUTLASS_NVCC_ARCHS=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e'\u003c/span\u003e75;80\u003cspan class=\"pl-pds\"\u003e'\u003c/span\u003e\u003c/span\u003e -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc\n...\n$ make cutlass_profiler -j16\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eExample command line for profiling one CUDA Core convolution kernel:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ ./tools/profiler/cutlass_profiler --kernels=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3\n\n\n=============================\n Problem ID: 1\n\n Provider: CUTLASS\n OperationKind: conv2d\n Operation: cutlass_simt_sfprop_optimized_128x128_8x2_nhwc\n\n Status: Success\n Verification: ON\n Disposition: Passed\n\nreference_device: Passed\n\n Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \\\n --stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f32:nhwc --Filter=f32:nhwc --Output=f32:nhwc \\\n --conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \\\n --eq_gemm_provider=none --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \\\n --warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024\n\n Bytes: 2055798784 bytes\n FLOPs: 118482796544 flops\n\n Runtime: 7.34266 ms\n Memory: 260.752 GiB/s\n\n Math: 16136.2 GFLOP/s\n\n\n=============================\n\"\u003e\u003cpre\u003e$ ./tools/profiler/cutlass_profiler --kernels=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3\n\n\n=============================\n Problem ID: 1\n\n Provider: CUTLASS\n OperationKind: conv2d\n Operation: cutlass_simt_sfprop_optimized_128x128_8x2_nhwc\n\n Status: Success\n Verification: ON\n Disposition: Passed\n\nreference_device: Passed\n\n Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \\\n --stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f32:nhwc --Filter=f32:nhwc --Output=f32:nhwc \\\n --conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \\\n --eq_gemm_provider=none --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \\\n --warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024\n\n Bytes: 2055798784 bytes\n FLOPs: 118482796544 flops\n\n Runtime: 7.34266 ms\n Memory: 260.752 GiB/s\n\n Math: 16136.2 GFLOP/s\n\n\n=============================\n\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eMore Details on Compiling CUTLASS Kernels and CUTLASS Profiler\u003c/h2\u003e\u003ca id=\"user-content-more-details-on-compiling-cutlass-kernels-and-cutlass-profiler\" class=\"anchor\" aria-label=\"Permalink: More Details on Compiling CUTLASS Kernels and CUTLASS Profiler\" href=\"#more-details-on-compiling-cutlass-kernels-and-cutlass-profiler\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003ePlease follow the links for more CMake examples on selectively compiling CUTLASS kernels:\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/quickstart.md#gemm-cmake-examples\"\u003eGEMM CMake Examples\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/quickstart.md#convolution-cmake-examples\"\u003eImplicit GEMM convolution CMake Examples\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"/NVIDIA/cutlass/blob/main/media/docs/profiler.md\"\u003eFurther details about the CUTLASS Profiler are described here.\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eAbout\u003c/h1\u003e\u003ca id=\"user-content-about\" class=\"anchor\" aria-label=\"Permalink: About\" href=\"#about\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCUTLASS is released by NVIDIA Corporation as Open Source software under the\n\u003ca href=\"/NVIDIA/cutlass/blob/main/LICENSE.txt\"\u003e3-clause \"New\" BSD license\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eContributors\u003c/h1\u003e\u003ca id=\"user-content-contributors\" class=\"anchor\" aria-label=\"Permalink: Contributors\" href=\"#contributors\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe official list of CUTLASS developers and contributors is available here: \u003ca href=\"/NVIDIA/cutlass/blob/main/CONTRIBUTORS.md\"\u003eCONTRIBUTORS\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCopyright\u003c/h1\u003e\u003ca id=\"user-content-copyright\" class=\"anchor\" aria-label=\"Permalink: Copyright\" href=\"#copyright\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCopyright (c) 2017 - 2025 NVIDIA CORPORATION \u0026amp; AFFILIATES. All rights reserved.\nSPDX-License-Identifier: BSD-3-Clause\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\" Redistribution and use in source and binary forms, with or without\n modification, are permitted provided that the following conditions are met:\n\n 1. Redistributions of source code must retain the above copyright notice, this\n list of conditions and the following disclaimer.\n\n 2. Redistributions in binary form must reproduce the above copyright notice,\n this list of conditions and the following disclaimer in the documentation\n and/or other materials provided with the distribution.\n\n 3. Neither the name of the copyright holder nor the names of its\n contributors may be used to endorse or promote products derived from\n this software without specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \u0026quot;AS IS\u0026quot;\n AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\n FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003e Redistribution and use in source and binary forms, with or without\n modification, are permitted provided that the following conditions are met:\n\n 1. Redistributions of source code must retain the above copyright notice, this\n list of conditions and the following disclaimer.\n\n 2. Redistributions in binary form must reproduce the above copyright notice,\n this list of conditions and the following disclaimer in the documentation\n and/or other materials provided with the distribution.\n\n 3. Neither the name of the copyright holder nor the names of its\n contributors may be used to endorse or promote products derived from\n this software without specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\n FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003c/article\u003e","loaded":true,"timedOut":false,"errorMessage":null,"headerInfo":{"toc":[{"level":1,"text":"CUTLASS 3.8.0","anchor":"cutlass-380","htmlText":"CUTLASS 3.8.0"},{"level":1,"text":"What's New in CUTLASS 3.8","anchor":"whats-new-in-cutlass-38","htmlText":"What's New in CUTLASS 3.8"},{"level":1,"text":"Performance","anchor":"performance","htmlText":"Performance"},{"level":1,"text":"CuTe","anchor":"cute","htmlText":"CuTe"},{"level":1,"text":"Compatibility","anchor":"compatibility","htmlText":"Compatibility"},{"level":2,"text":"Operating Systems","anchor":"operating-systems","htmlText":"Operating Systems"},{"level":2,"text":"Hardware","anchor":"hardware","htmlText":"Hardware"},{"level":2,"text":"Target Architecture","anchor":"target-architecture","htmlText":"Target Architecture"},{"level":1,"text":"Documentation","anchor":"documentation","htmlText":"Documentation"},{"level":1,"text":"Resources","anchor":"resources","htmlText":"Resources"},{"level":1,"text":"Building CUTLASS","anchor":"building-cutlass","htmlText":"Building CUTLASS"},{"level":1,"text":"Project Structure","anchor":"project-structure","htmlText":"Project Structure"},{"level":2,"text":"CUTLASS Template Library","anchor":"cutlass-template-library","htmlText":"CUTLASS Template Library"},{"level":3,"text":"CUTLASS SDK Examples","anchor":"cutlass-sdk-examples","htmlText":"CUTLASS SDK Examples"},{"level":3,"text":"Tools","anchor":"tools","htmlText":"Tools"},{"level":3,"text":"Test","anchor":"test","htmlText":"Test"},{"level":1,"text":"Performance Profiling","anchor":"performance-profiling","htmlText":"Performance Profiling"},{"level":2,"text":"Building all GEMM and Convolution kernels (long build times)","anchor":"building-all-gemm-and-convolution-kernels-long-build-times","htmlText":"Building all GEMM and Convolution kernels (long build times)"},{"level":2,"text":"Building a subset of GEMM and Convolution kernels (reduced build times)","anchor":"building-a-subset-of-gemm-and-convolution-kernels-reduced-build-times","htmlText":"Building a subset of GEMM and Convolution kernels (reduced build times)"},{"level":3,"text":"Building a subset Tensor Core GEMM kernels","anchor":"building-a-subset-tensor-core-gemm-kernels","htmlText":"Building a subset Tensor Core GEMM kernels"},{"level":3,"text":"Building one CUDA Core GEMM kernel","anchor":"building-one-cuda-core-gemm-kernel","htmlText":"Building one CUDA Core GEMM kernel"},{"level":3,"text":"Building a subset of Tensor Core Convolution kernels","anchor":"building-a-subset-of-tensor-core-convolution-kernels","htmlText":"Building a subset of Tensor Core Convolution kernels"},{"level":3,"text":"Building one Convolution CUDA kernel","anchor":"building-one-convolution-cuda-kernel","htmlText":"Building one Convolution CUDA kernel"},{"level":2,"text":"More Details on Compiling CUTLASS Kernels and CUTLASS Profiler","anchor":"more-details-on-compiling-cutlass-kernels-and-cutlass-profiler","htmlText":"More Details on Compiling CUTLASS Kernels and CUTLASS Profiler"},{"level":1,"text":"About","anchor":"about","htmlText":"About"},{"level":1,"text":"Contributors","anchor":"contributors","htmlText":"Contributors"},{"level":1,"text":"Copyright","anchor":"copyright","htmlText":"Copyright"}],"siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FNVIDIA%2Fcutlass"}},{"displayName":"LICENSE.txt","repoName":"cutlass","refName":"main","path":"LICENSE.txt","preferredFileType":"license","tabName":"License","richText":null,"loaded":false,"timedOut":false,"errorMessage":null,"headerInfo":{"toc":null,"siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FNVIDIA%2Fcutlass"}}],"overviewFilesProcessingTime":0}},"appPayload":{"helpUrl":"https://docs.github.com","findFileWorkerPath":"/assets-cdn/worker/find-file-worker-9f8a877aa99f.js","findInFileWorkerPath":"/assets-cdn/worker/find-in-file-worker-96e76d5fdb2c.js","githubDevUrl":null,"enabled_features":{"copilot_workspace":null,"code_nav_ui_events":false,"overview_shared_code_dropdown_button":false,"react_blob_overlay":false,"copilot_conversational_ux_embedding_update":false,"copilot_smell_icebreaker_ux":true,"accessible_code_button":true}}}}</script> <div data-target="react-partial.reactRoot"><style data-styled="true" data-styled-version="5.3.11">.iVEunk{margin-top:16px;margin-bottom:16px;}/*!sc*/ .jzuOtQ{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between;}/*!sc*/ .bGojzy{margin-bottom:0;display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;row-gap:16px;}/*!sc*/ .iNSVHo{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between;-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;padding-bottom:16px;padding-top:8px;}/*!sc*/ .bVgnfw{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:row;-ms-flex-direction:row;flex-direction:row;gap:8px;}/*!sc*/ @media screen and (max-width:320px){.bVgnfw{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}}/*!sc*/ .CEgMp{position:relative;}/*!sc*/ @media screen and (max-width:380px){.CEgMp .ref-selector-button-text-container{max-width:80px;}}/*!sc*/ @media screen and (max-width:320px){.CEgMp{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}.CEgMp .overview-ref-selector{width:100%;}.CEgMp .overview-ref-selector > span{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-pack:start;-webkit-justify-content:flex-start;-ms-flex-pack:start;justify-content:flex-start;}.CEgMp .overview-ref-selector > span > span[data-component="text"]{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}}/*!sc*/ .gMOVLe[data-size="medium"]{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;min-width:0;}/*!sc*/ .gMOVLe[data-size="medium"] svg{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));}/*!sc*/ .gMOVLe[data-size="medium"] > span{width:inherit;}/*!sc*/ .gUkoLg{-webkit-box-pack:center;-webkit-justify-content:center;-ms-flex-pack:center;justify-content:center;}/*!sc*/ .bZBlpz{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;width:100%;}/*!sc*/ .lhTYNA{margin-right:4px;color:var(--fgColor-muted,var(--color-fg-muted,#656d76));}/*!sc*/ .ffLUq{font-size:14px;min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;}/*!sc*/ .fLXEGX{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (max-width:1079px){.fLXEGX{display:none;}}/*!sc*/ .lmSMZJ[data-size="medium"]{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));padding-left:4px;padding-right:4px;}/*!sc*/ .lmSMZJ[data-size="medium"] span[data-component="leadingVisual"]{margin-right:4px !important;}/*!sc*/ .dqfxud{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (min-width:1080px){.dqfxud{display:none;}}/*!sc*/ @media screen and (max-width:543px){.dqfxud{display:none;}}/*!sc*/ .fGwBZA[data-size="medium"][data-no-visuals]{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));}/*!sc*/ .jxTzTd{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;padding-left:8px;gap:8px;}/*!sc*/ .gqqBXN{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;gap:8px;}/*!sc*/ @media screen and (max-width:543px){.gqqBXN{display:none;}}/*!sc*/ .dzXgxt{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (max-width:1011px){.dzXgxt{display:none;}}/*!sc*/ .iWFGlI{margin-left:8px;margin-right:8px;margin:0;}/*!sc*/ .YUPas{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (min-width:1012px){.YUPas{display:none;}}/*!sc*/ .izFOf{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (min-width:544px){.izFOf{display:none;}}/*!sc*/ .vIPPs{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;gap:16px;}/*!sc*/ .fdROMU{width:100%;border-collapse:separate;border-spacing:0;border:1px solid;border-color:var(--borderColor-default,var(--color-border-default,#d0d7de));border-radius:6px;table-layout:fixed;overflow:unset;}/*!sc*/ .jGKpsv{height:0px;line-height:0px;}/*!sc*/ .jGKpsv tr{height:0px;font-size:0px;}/*!sc*/ .jdgHnn{padding:16px;color:var(--fgColor-muted,var(--color-fg-muted,#656d76));font-size:12px;text-align:left;height:40px;}/*!sc*/ .jdgHnn th{padding-left:16px;background-color:var(--bgColor-muted,var(--color-canvas-subtle,#f6f8fa));}/*!sc*/ .bQivRW{width:100%;border-top-left-radius:6px;}/*!sc*/ @media screen and (min-width:544px){.bQivRW{display:none;}}/*!sc*/ .ldkMIO{width:40%;border-top-left-radius:6px;}/*!sc*/ @media screen and (max-width:543px){.ldkMIO{display:none;}}/*!sc*/ .jMbWeI{text-align:right;padding-right:16px;width:136px;border-top-right-radius:6px;}/*!sc*/ .gpqjiB{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));font-size:12px;height:40px;}/*!sc*/ .dzCJzi{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:row;-ms-flex-direction:row;flex-direction:row;-webkit-flex-wrap:wrap;-ms-flex-wrap:wrap;flex-wrap:wrap;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;gap:8px;min-width:273px;padding:8px;}/*!sc*/ @media screen and (min-width:544px){.dzCJzi{-webkit-flex-wrap:nowrap;-ms-flex-wrap:nowrap;flex-wrap:nowrap;}}/*!sc*/ .eNCcrz{text-align:center;vertical-align:center;height:40px;border-top:1px solid;border-color:var(--borderColor-default,var(--color-border-default,#d0d7de));}/*!sc*/ .bHTcCe{border-top:1px solid var(--borderColor-default,var(--color-border-default));cursor:pointer;}/*!sc*/ .csrIcr{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;gap:16px;}/*!sc*/ .bUQNHB{border:1px solid;border-color:var(--borderColor-default,var(--color-border-default,#d0d7de));border-radius:6px;display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}/*!sc*/ @media screen and (max-width:543px){.bUQNHB{margin-left:-16px;margin-right:-16px;max-width:calc(100% + 32px);}}/*!sc*/ @media screen and (min-width:544px){.bUQNHB{max-width:100%;}}/*!sc*/ .jPdcfu{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;border-bottom:1px solid;border-bottom-color:var(--borderColor-default,var(--color-border-default,#d0d7de));-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;padding-right:8px;position:-webkit-sticky;position:sticky;top:0;background-color:var(--bgColor-default,var(--color-canvas-default,#ffffff));z-index:1;border-top-left-radius:6px;border-top-right-radius:6px;}/*!sc*/ .hUCRAk{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;}/*!sc*/ .cwoBXV[data-size="medium"]{color:var(--fgColor-muted,var(--color-fg-subtle,#6e7781));padding-left:8px;padding-right:8px;}/*!sc*/ .QkQOb{padding:32px;overflow:auto;}/*!sc*/ data-styled.g1[id="Box-sc-g0xbh4-0"]{content:"iVEunk,jzuOtQ,bGojzy,iNSVHo,bVgnfw,CEgMp,gMOVLe,gUkoLg,bZBlpz,lhTYNA,ffLUq,fLXEGX,lmSMZJ,dqfxud,fGwBZA,jxTzTd,gqqBXN,dzXgxt,iWFGlI,YUPas,izFOf,vIPPs,fdROMU,jGKpsv,jdgHnn,bQivRW,ldkMIO,jMbWeI,gpqjiB,dzCJzi,eNCcrz,bHTcCe,csrIcr,bUQNHB,jPdcfu,hUCRAk,cwoBXV,QkQOb,"}/*!sc*/ .eMMFM{min-width:0;}/*!sc*/ .eMMFM:where([data-size='small']){font-size:var(--text-body-size-small,0.75rem);line-height:var(--text-body-lineHeight-small,1.6666);}/*!sc*/ .eMMFM:where([data-size='medium']){font-size:var(--text-body-size-medium,0.875rem);line-height:var(--text-body-lineHeight-medium,1.4285);}/*!sc*/ .eMMFM:where([data-size='large']){font-size:var(--text-body-size-large,1rem);line-height:var(--text-body-lineHeight-large,1.5);}/*!sc*/ .eMMFM:where([data-weight='light']){font-weight:var(--base-text-weight-light,300);}/*!sc*/ .eMMFM:where([data-weight='normal']){font-weight:var(--base-text-weight-normal,400);}/*!sc*/ .eMMFM:where([data-weight='medium']){font-weight:var(--base-text-weight-medium,500);}/*!sc*/ .eMMFM:where([data-weight='semibold']){font-weight:var(--base-text-weight-semibold,600);}/*!sc*/ data-styled.g3[id="Text__StyledText-sc-17v1xeu-0"]{content:"eMMFM,"}/*!sc*/ .brGdpi{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;-webkit-clip:rect(0,0,0,0);clip:rect(0,0,0,0);white-space:nowrap;border-width:0;}/*!sc*/ data-styled.g4[id="_VisuallyHidden__VisuallyHidden-sc-11jhm7a-0"]{content:"brGdpi,"}/*!sc*/ .jkNcAv{border:0;font-size:inherit;font-family:inherit;background-color:transparent;-webkit-appearance:none;color:inherit;width:100%;}/*!sc*/ .jkNcAv:focus{outline:0;}/*!sc*/ data-styled.g13[id="UnstyledTextInput__ToggledUnstyledTextInput-sc-14ypya-0"]{content:"jkNcAv,"}/*!sc*/ .hLzFvi{font-size:14px;line-height:var(--base-size-20);color:var(--fgColor-default,var(--color-fg-default,#1F2328));vertical-align:middle;background-color:var(--bgColor-default,var(--color-canvas-default,#ffffff));border:1px solid var(--control-borderColor-rest,var(--borderColor-default,var(--color-border-default,#d0d7de)));border-radius:6px;outline:none;box-shadow:var(--shadow-inset,var(--color-primer-shadow-inset,inset 0 1px 0 rgba(208,215,222,0.2)));display:-webkit-inline-box;display:-webkit-inline-flex;display:-ms-inline-flexbox;display:inline-flex;-webkit-align-items:stretch;-webkit-box-align:stretch;-ms-flex-align:stretch;align-items:stretch;min-height:var(--base-size-32);overflow:hidden;--inner-action-size:var(--base-size-24);}/*!sc*/ .hLzFvi input,.hLzFvi textarea{cursor:text;}/*!sc*/ .hLzFvi select{cursor:pointer;}/*!sc*/ .hLzFvi input::-webkit-input-placeholder,.hLzFvi textarea::-webkit-input-placeholder,.hLzFvi select::-webkit-input-placeholder{color:var(---control-fgColor-placeholder,var(--fgColor-muted,var(--color-fg-muted,#656d76)));}/*!sc*/ .hLzFvi input::-moz-placeholder,.hLzFvi textarea::-moz-placeholder,.hLzFvi select::-moz-placeholder{color:var(---control-fgColor-placeholder,var(--fgColor-muted,var(--color-fg-muted,#656d76)));}/*!sc*/ .hLzFvi input:-ms-input-placeholder,.hLzFvi textarea:-ms-input-placeholder,.hLzFvi select:-ms-input-placeholder{color:var(---control-fgColor-placeholder,var(--fgColor-muted,var(--color-fg-muted,#656d76)));}/*!sc*/ .hLzFvi input::placeholder,.hLzFvi textarea::placeholder,.hLzFvi select::placeholder{color:var(---control-fgColor-placeholder,var(--fgColor-muted,var(--color-fg-muted,#656d76)));}/*!sc*/ .hLzFvi:where([data-trailing-action][data-focused]),.hLzFvi:where(:not([data-trailing-action]):focus-within){border-color:var(--fgColor-accent,var(--color-accent-fg,#0969da));outline:2px solid var(--fgColor-accent,var(--color-accent-fg,#0969da));outline-offset:-1px;}/*!sc*/ .hLzFvi > textarea{padding:var(--base-size-12);}/*!sc*/ .hLzFvi:where([data-contrast]){background-color:var(--bgColor-inset,var(--color-canvas-inset,#f6f8fa));}/*!sc*/ .hLzFvi:where([data-disabled]){color:var(--fgColor-disabled,var(--color-primer-fg-disabled,#8c959f));background-color:var(--control-bgColor-disabled,var(--color-input-disabled-bg,rgba(175,184,193,0.2)));box-shadow:none;border-color:var(--control-borderColor-disabled,var(--borderColor-default,var(--color-border-default,#d0d7de)));}/*!sc*/ .hLzFvi:where([data-disabled]) input,.hLzFvi:where([data-disabled]) textarea,.hLzFvi:where([data-disabled]) select{cursor:not-allowed;}/*!sc*/ .hLzFvi:where([data-monospace]){font-family:var(--fontStack-monospace,SFMono-Regular,Consolas,"Liberation Mono",Menlo,Courier,monospace);}/*!sc*/ .hLzFvi:where([data-validation='error']){border-color:var(--borderColor-danger-emphasis,var(--color-danger-emphasis,#cf222e));}/*!sc*/ .hLzFvi:where([data-validation='error']):where([data-trailing-action][data-focused]),.hLzFvi:where([data-validation='error']):where(:not([data-trailing-action])):focus-within{border-color:var(--fgColor-accent,var(--color-accent-fg,#0969da));outline:2px solid var(--fgColor-accent,var(--color-accent-fg,#0969da));outline-offset:-1px;}/*!sc*/ .hLzFvi:where([data-validation='success']){border-color:var(--bgColor-success-emphasis,var(--color-success-emphasis,#1f883d));}/*!sc*/ .hLzFvi:where([data-block]){width:100%;display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-align-self:stretch;-ms-flex-item-align:stretch;align-self:stretch;}/*!sc*/ @media (min-width:768px){.hLzFvi{font-size:var(--text-body-size-medium);}}/*!sc*/ .hLzFvi:where([data-size='small']){--inner-action-size:var(--base-size-20);min-height:var(--base-size-28);padding-top:3px;padding-right:var(--base-size-8);padding-bottom:3px;padding-left:var(--base-size-8);font-size:var(--text-body-size-small);line-height:var(--base-size-20);}/*!sc*/ .hLzFvi:where([data-size='large']){--inner-action-size:var(--base-size-28);height:var(--base-size-40);padding-top:10px;padding-right:var(--base-size-8);padding-bottom:10px;padding-left:var(--base-size-8);}/*!sc*/ .hLzFvi:where([data-variant='small']){min-height:28px;padding-top:3px;padding-right:var(--base-size-8);padding-bottom:3px;padding-left:var(--base-size-8);font-size:(--text-body-size-small);line-height:var(--base-size-20);}/*!sc*/ .hLzFvi:where([data-variant='large']){padding-top:10px;padding-right:var(--base-size-8);padding-bottom:10px;padding-left:var(--base-size-8);font-size:var(--text-title-size-medium);}/*!sc*/ .hLzFvi{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;min-width:160px;}/*!sc*/ data-styled.g14[id="TextInputWrapper__StyledTextInputBaseWrapper-sc-1mqhpbi-0"]{content:"hLzFvi,"}/*!sc*/ .iHYdQq{background-repeat:no-repeat;background-position:right 8px center;padding-right:0;padding-left:0;}/*!sc*/ .iHYdQq > :not(:last-child){margin-right:8px;}/*!sc*/ .iHYdQq .TextInput-icon,.iHYdQq .TextInput-action{-webkit-align-self:center;-ms-flex-item-align:center;align-self:center;color:var(--fgColor-muted,var(--color-fg-muted,#656d76));-webkit-flex-shrink:0;-ms-flex-negative:0;flex-shrink:0;}/*!sc*/ .iHYdQq > input,.iHYdQq > select{padding-right:0;padding-left:0;}/*!sc*/ .iHYdQq:where([data-leading-visual]){padding-left:var(--base-size-12);}/*!sc*/ .iHYdQq:where([data-trailing-visual]:not([data-trailing-action])){padding-right:var(--base-size-12);}/*!sc*/ .iHYdQq:where(:not([data-leading-visual])) > input,.iHYdQq:where(:not([data-leading-visual])) > select{padding-left:var(--base-size-12);}/*!sc*/ .iHYdQq:where(:not([data-trailing-visual]):not([data-trailing-action])) > input,.iHYdQq:where(:not([data-trailing-visual]):not([data-trailing-action])) > select{padding-right:var(--base-size-12);}/*!sc*/ .iHYdQq{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;min-width:160px;}/*!sc*/ data-styled.g15[id="TextInputWrapper__StyledTextInputWrapper-sc-1mqhpbi-1"]{content:"iHYdQq,"}/*!sc*/ .hWlpPn{position:relative;display:inline-block;}/*!sc*/ .hWlpPn::after{position:absolute;z-index:1000000;display:none;padding:0.5em 0.75em;font:normal normal 11px/1.5 -apple-system,BlinkMacSystemFont,"Segoe UI","Noto Sans",Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji";-webkit-font-smoothing:subpixel-antialiased;color:var(--tooltip-fgColor,var(--fgColor-onEmphasis,var(--color-fg-on-emphasis,#ffffff)));text-align:center;-webkit-text-decoration:none;text-decoration:none;text-shadow:none;text-transform:none;-webkit-letter-spacing:normal;-moz-letter-spacing:normal;-ms-letter-spacing:normal;letter-spacing:normal;word-wrap:break-word;white-space:pre;pointer-events:none;content:attr(aria-label);background:var(--tooltip-bgColor,var(--bgColor-emphasis,var(--color-neutral-emphasis-plus,#24292f)));border-radius:6px;opacity:0;}/*!sc*/ @-webkit-keyframes tooltip-appear{from{opacity:0;}to{opacity:1;}}/*!sc*/ @keyframes tooltip-appear{from{opacity:0;}to{opacity:1;}}/*!sc*/ .hWlpPn:hover::after,.hWlpPn:active::after,.hWlpPn:focus::after,.hWlpPn:focus-within::after{display:inline-block;-webkit-text-decoration:none;text-decoration:none;-webkit-animation-name:tooltip-appear;animation-name:tooltip-appear;-webkit-animation-duration:0.1s;animation-duration:0.1s;-webkit-animation-fill-mode:forwards;animation-fill-mode:forwards;-webkit-animation-timing-function:ease-in;animation-timing-function:ease-in;-webkit-animation-delay:0s;animation-delay:0s;}/*!sc*/ .hWlpPn.tooltipped-no-delay:hover::after,.hWlpPn.tooltipped-no-delay:active::after,.hWlpPn.tooltipped-no-delay:focus::after,.hWlpPn.tooltipped-no-delay:focus-within::after{-webkit-animation-delay:0s;animation-delay:0s;}/*!sc*/ .hWlpPn.tooltipped-multiline:hover::after,.hWlpPn.tooltipped-multiline:active::after,.hWlpPn.tooltipped-multiline:focus::after,.hWlpPn.tooltipped-multiline:focus-within::after{display:table-cell;}/*!sc*/ .hWlpPn.tooltipped-s::after,.hWlpPn.tooltipped-se::after,.hWlpPn.tooltipped-sw::after{top:100%;right:50%;margin-top:6px;}/*!sc*/ .hWlpPn.tooltipped-se::after{right:auto;left:50%;margin-left:-16px;}/*!sc*/ .hWlpPn.tooltipped-sw::after{margin-right:-16px;}/*!sc*/ .hWlpPn.tooltipped-n::after,.hWlpPn.tooltipped-ne::after,.hWlpPn.tooltipped-nw::after{right:50%;bottom:100%;margin-bottom:6px;}/*!sc*/ .hWlpPn.tooltipped-ne::after{right:auto;left:50%;margin-left:-16px;}/*!sc*/ .hWlpPn.tooltipped-nw::after{margin-right:-16px;}/*!sc*/ .hWlpPn.tooltipped-s::after,.hWlpPn.tooltipped-n::after{-webkit-transform:translateX(50%);-ms-transform:translateX(50%);transform:translateX(50%);}/*!sc*/ .hWlpPn.tooltipped-w::after{right:100%;bottom:50%;margin-right:6px;-webkit-transform:translateY(50%);-ms-transform:translateY(50%);transform:translateY(50%);}/*!sc*/ .hWlpPn.tooltipped-e::after{bottom:50%;left:100%;margin-left:6px;-webkit-transform:translateY(50%);-ms-transform:translateY(50%);transform:translateY(50%);}/*!sc*/ .hWlpPn.tooltipped-multiline::after{width:-webkit-max-content;width:-moz-max-content;width:max-content;max-width:250px;word-wrap:break-word;white-space:pre-line;border-collapse:separate;}/*!sc*/ .hWlpPn.tooltipped-multiline.tooltipped-s::after,.hWlpPn.tooltipped-multiline.tooltipped-n::after{right:auto;left:50%;-webkit-transform:translateX(-50%);-ms-transform:translateX(-50%);transform:translateX(-50%);}/*!sc*/ .hWlpPn.tooltipped-multiline.tooltipped-w::after,.hWlpPn.tooltipped-multiline.tooltipped-e::after{right:100%;}/*!sc*/ .hWlpPn.tooltipped-align-right-2::after{right:0;margin-right:0;}/*!sc*/ .hWlpPn.tooltipped-align-left-2::after{left:0;margin-left:0;}/*!sc*/ data-styled.g17[id="Tooltip__TooltipBase-sc-17tf59c-0"]{content:"hWlpPn,"}/*!sc*/ .liVpTx{display:inline-block;overflow:hidden;text-overflow:ellipsis;vertical-align:top;white-space:nowrap;max-width:125px;}/*!sc*/ data-styled.g19[id="Truncate__StyledTruncate-sc-23o1d2-0"]{content:"liVpTx,"}/*!sc*/ .eBevHz{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;padding-inline:var(--stack-padding-normal,16px);-webkit-box-pack:start;-webkit-justify-content:flex-start;-ms-flex-pack:start;justify-content:flex-start;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;min-height:var(--control-xlarge-size,48px);box-shadow:inset 0px -1px var(--borderColor-muted,var(--borderColor-muted,var(--color-border-muted,hsla(210,18%,87%,1))));-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;border-bottom:none;max-width:100%;padding-left:8px;padding-right:8px;}/*!sc*/ data-styled.g99[id="UnderlineTabbedInterface__StyledComponentUnderlineWrapper-sc-4ilrg0-0"]{content:"eBevHz,"}/*!sc*/ .ehEdWC{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;list-style:none;white-space:nowrap;padding:0;margin:0;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;gap:8px;position:relative;}/*!sc*/ data-styled.g100[id="UnderlineTabbedInterface__StyledComponentUnderlineItemList-sc-4ilrg0-1"]{content:"ehEdWC,"}/*!sc*/ .beOdPj{-webkit-appearance:none;-moz-appearance:none;appearance:none;background-color:transparent;border:0;cursor:pointer;font:inherit;position:relative;display:-webkit-inline-box;display:-webkit-inline-flex;display:-ms-inline-flexbox;display:inline-flex;color:var(--fgColor-default,var(--color-fg-default,#1F2328));text-align:center;-webkit-text-decoration:none;text-decoration:none;line-height:var(--text-body-lineHeight-medium,1.4285);border-radius:var(--borderRadius-medium,6px);font-size:var(--text-body-size-medium,14px);padding-inline:var(--control-medium-paddingInline-condensed,8px);padding-block:var(--control-medium-paddingBlock,6px);-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;}/*!sc*/ @media (hover:hover){.beOdPj:hover{background-color:var(--bgColor-neutral-muted,var(--bgColor-neutral-muted,var(--color-neutral-subtle,rgba(234,238,242,0.5))));-webkit-transition:background 0.12s ease-out;transition:background 0.12s ease-out;-webkit-text-decoration:none;text-decoration:none;}}/*!sc*/ .beOdPj:focus:{outline:2px solid transparent;box-shadow:inset 0 0 0 2px var(--fgColor-accent,var(--fgColor-accent,var(--color-accent-fg,#0969da)));}/*!sc*/ .beOdPj:focus::not(:focus-visible){box-shadow:none;}/*!sc*/ .beOdPj:focus-visible{outline:2px solid transparent;box-shadow:inset 0 0 0 2px var(--fgColor-accent,var(--fgColor-accent,var(--color-accent-fg,#0969da)));}/*!sc*/ .beOdPj [data-content]::before{content:attr(data-content);display:block;height:0;font-weight:var(--base-text-weight-semibold,500);visibility:hidden;white-space:nowrap;}/*!sc*/ .beOdPj [data-component='icon']{color:var(--fgColor-muted,var(--fgColor-muted,var(--color-fg-muted,#656d76)));-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;display:-webkit-inline-box;display:-webkit-inline-flex;display:-ms-inline-flexbox;display:inline-flex;margin-inline-end:var(--control-medium-gap,8px);}/*!sc*/ .beOdPj [data-component='counter']{margin-inline-start:var(--control-medium-gap,8px);display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;}/*!sc*/ .beOdPj::after{position:absolute;right:50%;bottom:calc(50% - calc(var(--control-xlarge-size,48px) / 2 + 1px));width:100%;height:2px;content:'';background-color:transparent;border-radius:0;-webkit-transform:translate(50%,-50%);-ms-transform:translate(50%,-50%);transform:translate(50%,-50%);}/*!sc*/ .beOdPj[aria-current]:not([aria-current='false']) [data-component='text'],.beOdPj[aria-selected='true'] [data-component='text']{font-weight:var(--base-text-weight-semibold,500);}/*!sc*/ .beOdPj[aria-current]:not([aria-current='false'])::after,.beOdPj[aria-selected='true']::after{background-color:var(--underlineNav-borderColor-active,var(--color-primer-border-active,#fd8c73));}/*!sc*/ @media (forced-colors:active){.beOdPj[aria-current]:not([aria-current='false'])::after,.beOdPj[aria-selected='true']::after{background-color:LinkText;}}/*!sc*/ data-styled.g101[id="UnderlineTabbedInterface__StyledUnderlineItem-sc-4ilrg0-2"]{content:"beOdPj,"}/*!sc*/ </style> <!-- --> <!-- --> <div class="Box-sc-g0xbh4-0 iVEunk"><div class="Box-sc-g0xbh4-0 jzuOtQ"><div class="Box-sc-g0xbh4-0 bGojzy"></div></div><div class="Box-sc-g0xbh4-0 iNSVHo"><div class="Box-sc-g0xbh4-0 bVgnfw"><div class="Box-sc-g0xbh4-0 CEgMp"><button type="button" aria-haspopup="true" aria-expanded="false" tabindex="0" aria-label="main branch" data-testid="anchor-button" class="Box-sc-g0xbh4-0 gMOVLe prc-Button-ButtonBase-c50BI overview-ref-selector width-full" data-loading="false" data-size="medium" data-variant="default" aria-describedby="branch-picker-repos-header-ref-selector-loading-announcement" id="branch-picker-repos-header-ref-selector"><span data-component="buttonContent" class="Box-sc-g0xbh4-0 gUkoLg prc-Button-ButtonContent-HKbr-"><span data-component="text" class="prc-Button-Label-pTQ3x"><div class="Box-sc-g0xbh4-0 bZBlpz"><div class="Box-sc-g0xbh4-0 lhTYNA"><svg aria-hidden="true" focusable="false" class="octicon octicon-git-branch" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path></svg></div><div class="Box-sc-g0xbh4-0 ffLUq ref-selector-button-text-container"><span class="Text__StyledText-sc-17v1xeu-0 eMMFM"> <!-- -->main</span></div></div></span><span data-component="trailingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-triangle-down" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="m4.427 7.427 3.396 3.396a.25.25 0 0 0 .354 0l3.396-3.396A.25.25 0 0 0 11.396 7H4.604a.25.25 0 0 0-.177.427Z"></path></svg></span></span></button><button hidden="" data-hotkey-scope="read-only-cursor-text-area"></button></div><div class="Box-sc-g0xbh4-0 fLXEGX"><a style="--button-color:fg.muted" type="button" href="/NVIDIA/cutlass/branches" class="Box-sc-g0xbh4-0 lmSMZJ prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="invisible" aria-describedby=":Rclab:-loading-announcement"><span data-component="buttonContent" class="Box-sc-g0xbh4-0 gUkoLg prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-git-branch" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x">Branches</span></span></a><a style="--button-color:fg.muted" type="button" href="/NVIDIA/cutlass/tags" class="Box-sc-g0xbh4-0 lmSMZJ prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="invisible" aria-describedby=":Rklab:-loading-announcement"><span data-component="buttonContent" class="Box-sc-g0xbh4-0 gUkoLg prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-tag" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x">Tags</span></span></a></div><div class="Box-sc-g0xbh4-0 dqfxud"><a style="--button-color:fg.muted" type="button" aria-label="Go to Branches page" href="/NVIDIA/cutlass/branches" class="Box-sc-g0xbh4-0 fGwBZA prc-Button-ButtonBase-c50BI" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="invisible" aria-describedby=":Relab:-loading-announcement"><svg aria-hidden="true" focusable="false" class="octicon octicon-git-branch" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path></svg></a><a style="--button-color:fg.muted" type="button" aria-label="Go to Tags page" href="/NVIDIA/cutlass/tags" class="Box-sc-g0xbh4-0 fGwBZA prc-Button-ButtonBase-c50BI" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="invisible" aria-describedby=":Rmlab:-loading-announcement"><svg aria-hidden="true" focusable="false" class="octicon octicon-tag" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path></svg></a></div></div><div class="Box-sc-g0xbh4-0 jxTzTd"><div class="Box-sc-g0xbh4-0 gqqBXN"><div class="Box-sc-g0xbh4-0 dzXgxt"><!--$--><div class="Box-sc-g0xbh4-0 iWFGlI"><span class="TextInputWrapper__StyledTextInputBaseWrapper-sc-1mqhpbi-0 hLzFvi TextInputWrapper__StyledTextInputWrapper-sc-1mqhpbi-1 iHYdQq TextInput-wrapper" data-leading-visual="true" data-trailing-visual="true" aria-busy="false"><span class="TextInput-icon" id=":R2j5ab:" aria-hidden="true"><svg aria-hidden="true" focusable="false" class="octicon octicon-search" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path></svg></span><input type="text" aria-label="Go to file" role="combobox" aria-controls="file-results-list" aria-expanded="false" aria-haspopup="dialog" autoCorrect="off" spellcheck="false" placeholder="Go to file" aria-describedby=":R2j5ab: :R2j5abH1:" data-component="input" class="UnstyledTextInput__ToggledUnstyledTextInput-sc-14ypya-0 jkNcAv" value=""/><span class="TextInput-icon" id=":R2j5abH1:" aria-hidden="true"></span></span></div><!--/$--></div><div class="Box-sc-g0xbh4-0 YUPas"><button type="button" class="prc-Button-ButtonBase-c50BI" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="default" aria-describedby=":Rr5ab:-loading-announcement"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="text" class="prc-Button-Label-pTQ3x">Go to file</span></span></button></div><div class="react-directory-add-file-icon"></div><div class="react-directory-remove-file-icon"></div></div><button type="button" aria-haspopup="true" aria-expanded="false" tabindex="0" class="prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="primary" aria-describedby=":R55ab:-loading-announcement" id=":R55ab:"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="hide-sm" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x">Code</span><span data-component="trailingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-triangle-down" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="m4.427 7.427 3.396 3.396a.25.25 0 0 0 .354 0l3.396-3.396A.25.25 0 0 0 11.396 7H4.604a.25.25 0 0 0-.177.427Z"></path></svg></span></span></button><div class="Box-sc-g0xbh4-0 izFOf"><button data-component="IconButton" type="button" aria-label="Open more actions menu" aria-haspopup="true" aria-expanded="false" tabindex="0" class="prc-Button-ButtonBase-c50BI prc-Button-IconButton-szpyj" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="default" aria-describedby=":R75ab:-loading-announcement" id=":R75ab:"><svg aria-hidden="true" focusable="false" class="octicon octicon-kebab-horizontal" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M8 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3ZM1.5 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Zm13 0a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path></svg></button></div></div></div><div class="Box-sc-g0xbh4-0 vIPPs"><div data-hpc="true"><button hidden="" data-testid="focus-next-element-button" data-hotkey="j"></button><button hidden="" data-testid="focus-previous-element-button" data-hotkey="k"></button><h2 class="sr-only ScreenReaderHeading-module__userSelectNone--vW4Cq prc-Heading-Heading-6CmGO" data-testid="screen-reader-heading" id="folders-and-files">Folders and files</h2><table aria-labelledby="folders-and-files" class="Box-sc-g0xbh4-0 fdROMU"><thead class="Box-sc-g0xbh4-0 jGKpsv"><tr class="Box-sc-g0xbh4-0 jdgHnn"><th colSpan="2" class="Box-sc-g0xbh4-0 bQivRW"><span class="text-bold">Name</span></th><th colSpan="1" class="Box-sc-g0xbh4-0 ldkMIO"><span class="text-bold">Name</span></th><th class="hide-sm"><div title="Last commit message" class="Truncate__StyledTruncate-sc-23o1d2-0 liVpTx width-fit"><span class="text-bold">Last commit message</span></div></th><th colSpan="1" class="Box-sc-g0xbh4-0 jMbWeI"><div title="Last commit date" class="Truncate__StyledTruncate-sc-23o1d2-0 liVpTx width-fit"><span class="text-bold">Last commit date</span></div></th></tr></thead><tbody><tr class="Box-sc-g0xbh4-0 gpqjiB"><td colSpan="3" class="bgColor-muted p-1 rounded-top-2"><div class="Box-sc-g0xbh4-0 dzCJzi"><h2 class="sr-only ScreenReaderHeading-module__userSelectNone--vW4Cq prc-Heading-Heading-6CmGO" data-testid="screen-reader-heading">Latest commit</h2><div style="width:120px" class="Skeleton Skeleton--text" data-testid="loading"> </div><div class="d-flex flex-shrink-0 gap-2"><div data-testid="latest-commit-details" class="d-none d-sm-flex flex-items-center"></div><div class="d-flex gap-2"><h2 class="sr-only ScreenReaderHeading-module__userSelectNone--vW4Cq prc-Heading-Heading-6CmGO" data-testid="screen-reader-heading">History</h2><a href="/NVIDIA/cutlass/commits/main/" class="prc-Button-ButtonBase-c50BI d-none d-lg-flex LinkButton-module__code-view-link-button--xvCGA flex-items-center fgColor-default" data-loading="false" data-size="small" data-variant="invisible" aria-describedby=":Raqj8pab:-loading-announcement"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-history" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="m.427 1.927 1.215 1.215a8.002 8.002 0 1 1-1.6 5.685.75.75 0 1 1 1.493-.154 6.5 6.5 0 1 0 1.18-4.458l1.358 1.358A.25.25 0 0 1 3.896 6H.25A.25.25 0 0 1 0 5.75V2.104a.25.25 0 0 1 .427-.177ZM7.75 4a.75.75 0 0 1 .75.75v2.992l2.028.812a.75.75 0 0 1-.557 1.392l-2.5-1A.751.751 0 0 1 7 8.25v-3.5A.75.75 0 0 1 7.75 4Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x"><span class="fgColor-default">601 Commits</span></span></span></a><div class="d-sm-none"></div><div class="d-flex d-lg-none"><span role="tooltip" aria-label="601 Commits" id="history-icon-button-tooltip" class="Tooltip__TooltipBase-sc-17tf59c-0 hWlpPn tooltipped-n"><a href="/NVIDIA/cutlass/commits/main/" class="prc-Button-ButtonBase-c50BI LinkButton-module__code-view-link-button--xvCGA flex-items-center fgColor-default" data-loading="false" data-size="small" data-variant="invisible" aria-describedby=":R1iqj8pab:-loading-announcement history-icon-button-tooltip"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-history" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="m.427 1.927 1.215 1.215a8.002 8.002 0 1 1-1.6 5.685.75.75 0 1 1 1.493-.154 6.5 6.5 0 1 0 1.18-4.458l1.358 1.358A.25.25 0 0 1 3.896 6H.25A.25.25 0 0 1 0 5.75V2.104a.25.25 0 0 1 .427-.177ZM7.75 4a.75.75 0 0 1 .75.75v2.992l2.028.812a.75.75 0 0 1-.557 1.392l-2.5-1A.751.751 0 0 1 7 8.25v-3.5A.75.75 0 0 1 7.75 4Z"></path></svg></span></span></a></span></div></div></div></div></td></tr><tr class="react-directory-row undefined" id="folder-row-0"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".github" aria-label=".github, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/.github">.github</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".github" aria-label=".github, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/.github">.github</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-1"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="cmake" aria-label="cmake, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/cmake">cmake</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="cmake" aria-label="cmake, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/cmake">cmake</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-2"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="docs" aria-label="docs, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/docs">docs</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="docs" aria-label="docs, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/docs">docs</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-3"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="examples" aria-label="examples, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/examples">examples</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="examples" aria-label="examples, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/examples">examples</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-4"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="include" aria-label="include, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/include">include</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="include" aria-label="include, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/include">include</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-5"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="media" aria-label="media, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/media">media</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="media" aria-label="media, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/media">media</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-6"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="python" aria-label="python, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/python">python</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="python" aria-label="python, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/python">python</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-7"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="test" aria-label="test, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/test">test</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="test" aria-label="test, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/test">test</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-8"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="tools" aria-label="tools, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/tools">tools</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="tools" aria-label="tools, (Directory)" class="Link--primary" href="/NVIDIA/cutlass/tree/main/tools">tools</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-9"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".gitignore" aria-label=".gitignore, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/.gitignore">.gitignore</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".gitignore" aria-label=".gitignore, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/.gitignore">.gitignore</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-10"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".gitmodules" aria-label=".gitmodules, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/.gitmodules">.gitmodules</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".gitmodules" aria-label=".gitmodules, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/.gitmodules">.gitmodules</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-11"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="ACTIVE_DEVELOPERS.md" aria-label="ACTIVE_DEVELOPERS.md, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/ACTIVE_DEVELOPERS.md">ACTIVE_DEVELOPERS.md</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="ACTIVE_DEVELOPERS.md" aria-label="ACTIVE_DEVELOPERS.md, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/ACTIVE_DEVELOPERS.md">ACTIVE_DEVELOPERS.md</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-12"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CHANGELOG.md" aria-label="CHANGELOG.md, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/CHANGELOG.md">CHANGELOG.md</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CHANGELOG.md" aria-label="CHANGELOG.md, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/CHANGELOG.md">CHANGELOG.md</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-13"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CITATION.cff" aria-label="CITATION.cff, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/CITATION.cff">CITATION.cff</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CITATION.cff" aria-label="CITATION.cff, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/CITATION.cff">CITATION.cff</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-14"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CMakeLists.txt" aria-label="CMakeLists.txt, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/CMakeLists.txt">CMakeLists.txt</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CMakeLists.txt" aria-label="CMakeLists.txt, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/CMakeLists.txt">CMakeLists.txt</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-15"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CUDA.cmake" aria-label="CUDA.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/CUDA.cmake">CUDA.cmake</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CUDA.cmake" aria-label="CUDA.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/CUDA.cmake">CUDA.cmake</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-16"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="Doxyfile" aria-label="Doxyfile, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/Doxyfile">Doxyfile</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="Doxyfile" aria-label="Doxyfile, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/Doxyfile">Doxyfile</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-17"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="LICENSE.txt" aria-label="LICENSE.txt, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/LICENSE.txt">LICENSE.txt</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="LICENSE.txt" aria-label="LICENSE.txt, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/LICENSE.txt">LICENSE.txt</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-18"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="PUBLICATIONS.md" aria-label="PUBLICATIONS.md, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/PUBLICATIONS.md">PUBLICATIONS.md</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="PUBLICATIONS.md" aria-label="PUBLICATIONS.md, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/PUBLICATIONS.md">PUBLICATIONS.md</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-19"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="README.md" aria-label="README.md, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/README.md">README.md</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="README.md" aria-label="README.md, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/README.md">README.md</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-20"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="bin2hex.cmake" aria-label="bin2hex.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/bin2hex.cmake">bin2hex.cmake</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="bin2hex.cmake" aria-label="bin2hex.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/bin2hex.cmake">bin2hex.cmake</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-21"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="cuBLAS.cmake" aria-label="cuBLAS.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/cuBLAS.cmake">cuBLAS.cmake</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="cuBLAS.cmake" aria-label="cuBLAS.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/cuBLAS.cmake">cuBLAS.cmake</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-22"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="cuDNN.cmake" aria-label="cuDNN.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/cuDNN.cmake">cuDNN.cmake</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="cuDNN.cmake" aria-label="cuDNN.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/cuDNN.cmake">cuDNN.cmake</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-23"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="customConfigs.cmake" aria-label="customConfigs.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/customConfigs.cmake">customConfigs.cmake</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="customConfigs.cmake" aria-label="customConfigs.cmake, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/customConfigs.cmake">customConfigs.cmake</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-24"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="pyproject.toml" aria-label="pyproject.toml, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/pyproject.toml">pyproject.toml</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="pyproject.toml" aria-label="pyproject.toml, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/pyproject.toml">pyproject.toml</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-25"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="setup.cfg" aria-label="setup.cfg, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/setup.cfg">setup.cfg</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="setup.cfg" aria-label="setup.cfg, (File)" class="Link--primary" href="/NVIDIA/cutlass/blob/main/setup.cfg">setup.cfg</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="Box-sc-g0xbh4-0 eNCcrz show-for-mobile" data-testid="view-all-files-row"><td colSpan="3" class="Box-sc-g0xbh4-0 bHTcCe"><div><button class="prc-Link-Link-85e08">View all files</button></div></td></tr></tbody></table></div><div class="Box-sc-g0xbh4-0 csrIcr"><div class="Box-sc-g0xbh4-0 bUQNHB"><div itemscope="" itemType="https://schema.org/abstract" class="Box-sc-g0xbh4-0 jPdcfu"><h2 class="_VisuallyHidden__VisuallyHidden-sc-11jhm7a-0 brGdpi">Repository files navigation</h2><nav class="UnderlineTabbedInterface__StyledComponentUnderlineWrapper-sc-4ilrg0-0 eBevHz" aria-label="Repository files"><ul role="list" class="UnderlineTabbedInterface__StyledComponentUnderlineItemList-sc-4ilrg0-1 ehEdWC"><li class="Box-sc-g0xbh4-0 hUCRAk"><a href="#" aria-current="page" class="UnderlineTabbedInterface__StyledUnderlineItem-sc-4ilrg0-2 beOdPj"><span data-component="icon"><svg aria-hidden="true" focusable="false" class="octicon octicon-book" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path></svg></span><span data-component="text" data-content="README">README</span></a></li><li class="Box-sc-g0xbh4-0 hUCRAk"><a href="#" class="UnderlineTabbedInterface__StyledUnderlineItem-sc-4ilrg0-2 beOdPj"><span data-component="icon"><svg aria-hidden="true" focusable="false" class="octicon octicon-law" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"></path></svg></span><span data-component="text" data-content="License">License</span></a></li></ul></nav><button style="--button-color:fg.subtle" type="button" aria-label="Outline" aria-haspopup="true" aria-expanded="false" tabindex="0" class="Box-sc-g0xbh4-0 cwoBXV prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="invisible" aria-describedby=":Rr9ab:-loading-announcement" id=":Rr9ab:"><svg aria-hidden="true" focusable="false" class="octicon octicon-list-unordered" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible"><path d="M5.75 2.5h8.5a.75.75 0 0 1 0 1.5h-8.5a.75.75 0 0 1 0-1.5Zm0 5h8.5a.75.75 0 0 1 0 1.5h-8.5a.75.75 0 0 1 0-1.5Zm0 5h8.5a.75.75 0 0 1 0 1.5h-8.5a.75.75 0 0 1 0-1.5ZM2 14a1 1 0 1 1 0-2 1 1 0 0 1 0 2Zm1-6a1 1 0 1 1-2 0 1 1 0 0 1 2 0ZM2 4a1 1 0 1 1 0-2 1 1 0 0 1 0 2Z"></path></svg></button></div><div class="Box-sc-g0xbh4-0 QkQOb js-snippet-clipboard-copy-unpositioned undefined" data-hpc="true"><article class="markdown-body entry-content container-lg" itemprop="text"><p dir="auto"><a target="_blank" rel="noopener noreferrer" href="/NVIDIA/cutlass/blob/main/media/images/gemm-hierarchy-with-epilogue-no-labels.png"><img src="/NVIDIA/cutlass/raw/main/media/images/gemm-hierarchy-with-epilogue-no-labels.png" alt="ALT" title="Complete CUDA GEMM decomposition" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">CUTLASS 3.8.0</h1><a id="user-content-cutlass-380" class="anchor" aria-label="Permalink: CUTLASS 3.8.0" href="#cutlass-380"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><em>CUTLASS 3.8.0 - January 2025</em></p> <p dir="auto">CUTLASS is a collection of CUDA C++ template abstractions for implementing high-performance matrix-matrix multiplication (GEMM) and related computations at all levels and scales within CUDA. It incorporates strategies for hierarchical decomposition and data movement similar to those used to implement cuBLAS and cuDNN. CUTLASS decomposes these "moving parts" into reusable, modular software components abstracted by C++ template classes. Primitives for different levels of a conceptual parallelization hierarchy can be specialized and tuned via custom tiling sizes, data types, and other algorithmic policy. The resulting flexibility simplifies their use as building blocks within custom kernels and applications.</p> <p dir="auto">To support a wide variety of applications, CUTLASS provides extensive support for mixed-precision computations, providing specialized data-movement and multiply-accumulate abstractions for FP64, FP32, TF32, FP16, BF16, <a href="/NVIDIA/cutlass/blob/main/examples/27_ampere_3xtf32_fast_accurate_tensorop_gemm">FP32 emulation via tensor core instruction</a>, 8b floating point types (e5m2 and e4m3), block scaled data types (NVIDIA NVFP4 and OCP standard MXFP4, MXFP6, MXFP8), narrow integer types (4 and 8b signed and unsigned integers), and binary 1b data types (where architectures allow for the native support of such data types). CUTLASS demonstrates optimal matrix multiply operations targeting the programmable, high-throughput <em>Tensor Cores</em> implemented by NVIDIA's Volta, Turing, Ampere, Ada, Hopper, and Blackwell architectures.</p> <p dir="auto">In addition to GEMMs, CUTLASS implements high-performance convolution via the implicit GEMM algorithm. Implicit GEMM is the formulation of a convolution operation as a GEMM thereby taking advantage of CUTLASS's modular GEMM pipeline. This allows CUTLASS to build convolutions by reusing highly-optimized GEMM components.</p> <p dir="auto">See the <a href="/NVIDIA/cutlass/blob/main/media/docs/quickstart.md">Quick Start Guide</a> to get started quickly.</p> <p dir="auto">See the <a href="/NVIDIA/cutlass/blob/main/media/docs/functionality.md">functionality docs</a> for a more comprehensive list of kernel level features, data types, instructions, and minimum supported by CUTLASS on each GPU architecture.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">What's New in CUTLASS 3.8</h1><a id="user-content-whats-new-in-cutlass-38" class="anchor" aria-label="Permalink: What's New in CUTLASS 3.8" href="#whats-new-in-cutlass-38"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">CUTLASS 3.8 is the first release that supports the NVIDIA Blackwell SM100 architecture. For a background on Blackwell's new features, please consult the PTX documentation for CUDA 12.8.</p> <ul dir="auto"> <li>Support for new CuTe building blocks specifically for Blackwell architecture: <ul dir="auto"> <li><a href="/NVIDIA/cutlass/blob/main/include/cute/atom/mma_traits_sm100.hpp">5th generation Blackwell Tensor Core instructions (TCGen05)</a> via CuTe MMA atoms.</li> <li>Extensions to <a href="/NVIDIA/cutlass/blob/main/include/cute/atom/copy_traits_sm100_tma.hpp">Tensor Memory Accelerator</a> via CuTe Copy atoms.</li> <li>Exposure of Blackwell's new tensor memory (note: distinct from TMA) as <a href="/NVIDIA/cutlass/blob/main/include/cute/pointer.hpp#L290"><code>tmem</code></a> across CuTe as a first class data locale.</li> <li>Exposure of <a href="/NVIDIA/cutlass/blob/main/include/cute/atom/copy_traits_sm100.hpp"><code>tmem-&gt;rmem</code>, <code>rmem-&gt;tmem</code> and <code>smem-&gt;tmem data movement instructions</code></a> as copy atoms in CuTe.</li> <li><a href="/NVIDIA/cutlass/blob/main/include/cute/atom/copy_traits_sm100.hpp"><code>make_tmem_copy()</code></a> utility method to ease creation of tiled copies for tmem copy atoms.</li> <li>Support for <a href="/NVIDIA/cutlass/blob/main/include/cute/atom/copy_traits_sm100.hpp">new variants of LDSM on Blackwell</a> via CuTe Copy atoms.</li> </ul> </li> <li>Support for new CUTLASS building blocks specifically for Blackwell architecture: <ul dir="auto"> <li>Various narrow precision <a href="/NVIDIA/cutlass/blob/main/include/cutlass/exmy_base.h">FP4, FP6, and FP8</a> formats as well as their <a href="/NVIDIA/cutlass/blob/main/include/cutlass/float_subbyte.h">block-scaled variants NVFP4, MXFP4, MXFP6, and MXFP8</a></li> <li><a href="/NVIDIA/cutlass/blob/main/include/cutlass/pipeline/sm100_pipeline.hpp">Pipelines that implement Blackwell specific synchronization</a>.</li> <li><a href="/NVIDIA/cutlass/blob/main/include/cutlass/cluster_launch.hpp">Cluster launch control API supporting preferred and fallback cluster shapes</a>.</li> <li>Data types including NVFP4, MXFP4, MXFP6, and MXFP8 and all their supported element and scale factor types.</li> <li>Tile schedulers using <a href="/NVIDIA/cutlass/blob/main/media/docs/blackwell_cluster_launch_control.md">Blackwell's Cluster Launch Control (CLC) feature</a> to implement dynamic persistence scheduling for <a href="/NVIDIA/cutlass/blob/main/include/cutlass/gemm/kernel/sm100_tile_scheduler.hpp">GEMMs</a>, and <a href="/NVIDIA/cutlass/blob/main/include/cutlass/gemm/kernel/sm100_tile_scheduler_stream_k.hpp">stream-K</a>.</li> <li>Extensions to testbeds and reference check code for unit tests and CUTLASS profiler.</li> </ul> </li> <li>Full support for Blackwell kernels in CUTLASS 3.x API: <ul dir="auto"> <li><a href="/NVIDIA/cutlass/blob/main/include/cutlass/gemm/kernel/sm100_gemm_tma_warpspecialized.hpp">Blackwell specific kernel layers</a> that <ul dir="auto"> <li>Implement a new warp-specialization recipe tuned specifically for Blackwell.</li> <li>Leverage all the new features such as CLC based tile scheduling, preferred cluster, and TMEM based double buffering of accumulators.</li> <li>Support stream-K load balancing for all kernel types everywhere via composable scheduler support.</li> </ul> </li> <li>Blackwell collective mainloops that target the TCGen05 MMA instructions (both SS and TS) for <ul dir="auto"> <li><a href="/NVIDIA/cutlass/blob/main/include/cutlass/gemm/collective/sm100_mma_warpspecialized.hpp">Non-block scaled data types without support for pointer array and grouped GEMM with TMA</a></li> <li><a href="/NVIDIA/cutlass/blob/main/include/cutlass/gemm/collective/sm100_mma_array_warpspecialized.hpp">Non-block scaled data types with support for pointer array and grouped GEMM with TMA</a></li> <li><a href="/NVIDIA/cutlass/blob/main/include/cutlass/gemm/collective/sm100_blockscaled_mma_warpspecialized.hpp">Block scaled data types without support for pointer array and grouped GEMM with TMA</a></li> <li><a href="/NVIDIA/cutlass/blob/main/include/cutlass/gemm/collective/sm100_blockscaled_mma_array_warpspecialized.hpp">Block scaled data types with support for pointer array and grouped GEMM with TMA</a></li> </ul> </li> <li>Blackwell <a href="/NVIDIA/cutlass/blob/main/include/cutlass/conv/collective/sm100_implicit_gemm_umma_warpspecialized.hpp">collective mainloop for convolution kernels</a> supporting non-block scaled data types for fprop, dgrad, and wgrad.</li> <li>New <a href="/NVIDIA/cutlass/blob/main/include/cutlass/gemm/dispatch_policy.hpp">GEMM</a>, <a href="/NVIDIA/cutlass/blob/main/include/cutlass/conv/dispatch_policy.hpp">convolution</a>, and <a href="/NVIDIA/cutlass/blob/main/include/cutlass/epilogue/dispatch_policy.hpp">epilogue</a> dispatch policies for collectives, kernel layers, and builders.</li> <li><a href="/NVIDIA/cutlass/blob/main/include/cutlass/epilogue/collective/sm100_epilogue_tma_warpspecialized.hpp">Blackwell epilogue that supports loading accumulators from <code>tmem</code></a> and <a href="/NVIDIA/cutlass/blob/main">full set of EVT fusions</a>.</li> </ul> </li> <li>CUTLASS library and profiler integration for block scaled data types for kernel emission, profiling, and verification. <ul dir="auto"> <li>Support for preferred and fallback cluster shapes via profiler command line arguments parsing to set dynamic cluster shapes.</li> <li>Support for dynamic datatypes by parsing profiler via profiler command line arguments parsing to set dynamic datatype setting in TCGen05 MMA instruction descriptors.</li> </ul> </li> <li>Set of examples that demonstrate the usage of the 3.x API for targeting Blackwell <ul dir="auto"> <li><a href="/NVIDIA/cutlass/blob/main/examples/70_blackwell_gemm">Basic FP16 and FP8 GEMMs with minimal changes from Hopper examples</a>, demonstrating ease of migration for off the shelf kernels using the 3.x collective builder API.</li> <li>GEMM with <a href="/NVIDIA/cutlass/blob/main/examples/71_blackwell_gemm_with_collective_builder/71_blackwell_gemm_with_collective_builder.cu">opt-in collective builder schedules showcasing available recipes</a> for Blackwell.</li> <li>Block scaled data type GEMMs targeting Blackwell's native block scaled Tensor Cores: <ul dir="auto"> <li><a href="/NVIDIA/cutlass/blob/main/examples/72_blackwell_narrow_precision_gemm/72a_blackwell_nvfp4_bf16_gemm.cu">NVFP4 inputs with BF16 output</a></li> <li><a href="/NVIDIA/cutlass/blob/main/examples/72_blackwell_narrow_precision_gemm/72b_blackwell_nvfp4_nvfp4_gemm.cu">NVFP4 inputs with NVFP4 output</a></li> <li><a href="/NVIDIA/cutlass/blob/main/examples/72_blackwell_narrow_precision_gemm/72c_blackwell_mixed_mxfp8_bf16_gemm.cu">Mixed MXFP8 and MXFP6 inputs with BF16 output</a></li> </ul> </li> <li>GEMM example demonstrating <a href="/NVIDIA/cutlass/blob/main/examples/73_blackwell_gemm_preferred_cluster/blackwell_gemm_preferred_cluster.cu">Blackwell's new preferred cluster support via dynamic cluster shapes</a> for increased occupancy.</li> <li><a href="/NVIDIA/cutlass/blob/main/examples/74_blackwell_gemm_streamk/blackwell_gemm_streamk.cu">GEMM with CLC based StreamK scheduler for load balancing</a>.</li> <li>Grouped GEMM for <a href="/NVIDIA/cutlass/blob/main/examples/75_blackwell_grouped_gemm/75_blackwell_grouped_gemm.cu">vanilla FP8 data inputs</a> and <a href="/NVIDIA/cutlass/blob/main/examples/75_blackwell_grouped_gemm/75_blackwell_grouped_gemm_block_scaled.cu">NVFP4 block scaled inputs</a>.</li> <li>Convolution kernels for <a href="/NVIDIA/cutlass/blob/main/examples/76_blackwell_conv/76_blackwell_conv_fprop.cu">fprop</a>, <a href="/NVIDIA/cutlass/blob/main/examples/76_blackwell_conv/76_blackwell_conv_dgrad.cu">dgrad</a>, and <a href="/NVIDIA/cutlass/blob/main/examples/76_blackwell_conv/76_blackwell_conv_wgrad.cu">wgrad</a>.</li> <li><a href="/NVIDIA/cutlass/blob/main/examples/77_blackwell_fmha/77_blackwell_fmha.cu">Fused multi-head attention fprop kernel</a> supporting fp16/bf16/fp8 data types across head dims of 32,64, and 128.</li> </ul> </li> <li>Documentation updates: <ul dir="auto"> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/quickstart.md#instantiating-a-blackwell-gemm-kernel">Quickstart - instantiating a Blackwell block-scaled GEMM</a>.</li> <li>Detailed <a href="/NVIDIA/cutlass/blob/main/media/docs/blackwell_functionality.md">Blackwell block-scaled GEMM functionality documentation</a></li> <li>A new <a href="/NVIDIA/cutlass/blob/main/media/docs/functionality.md">functionality documentation</a> specifically for 3.x API comprehensively documenting all supported kernel types, data types, kernel features, minimum CUDA tookit support etc for 3.x supported architectures.</li> <li>Updates to <a href="/NVIDIA/cutlass/blob/main/README.md#compatibility">compatibility</a> section regarding supported compilers, operating systems, CUDA Toolkits, Hardware Architectures, and <a href="/NVIDIA/cutlass/blob/main/README.md#Target-Architecture">Target Architecture</a>.</li> </ul> </li> </ul> <p dir="auto">Note: CUTLASS 3.x builds are known to be down on Windows platforms for all CUDA toolkits. CUTLASS team is working on a fix.</p> <p dir="auto"><strong>See the <a href="/NVIDIA/cutlass/blob/main/CHANGELOG.md">CHANGELOG</a> for details of all past releases and updates.</strong></p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Performance</h1><a id="user-content-performance" class="anchor" aria-label="Permalink: Performance" href="#performance"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">CUTLASS primitives are very efficient. When used to construct device-wide GEMM kernels, they exhibit nearly optimal utilization of peak theoretical throughput. The figure below shows CUTLASS 3.8's performance as a % of theoretical peak utilization on various input and output data types when run on NVIDIA Blackwell SM100 architecture GPU.</p> <p align="center" dir="auto"><a target="_blank" rel="noopener noreferrer" href="/NVIDIA/cutlass/blob/main/media/images/cutlass-3.8-blackwell-gemm-peak-performance.svg"><img src="/NVIDIA/cutlass/raw/main/media/images/cutlass-3.8-blackwell-gemm-peak-performance.svg" style="max-width: 100%;"></a></p> <p dir="auto">The two figures below show the continual CUTLASS performance improvements on an <a href="https://www.nvidia.com/en-us/data-center/h100/" rel="nofollow">NVIDIA H100</a> (NVIDIA Hopper architecture) since CUTLASS 3.1. CUTLASS 3.5.1 was compiled with the <a href="https://developer.nvidia.com/cuda-downloads" rel="nofollow">CUDA 12.5u1 Toolkit</a>. Tensor Core operations are implemented using CUDA's <a href="https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-mma" rel="nofollow">mma</a> and <a href="https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions" rel="nofollow">wgmma</a> instructions.</p> <p align="center" dir="auto"><a target="_blank" rel="noopener noreferrer" href="/NVIDIA/cutlass/blob/main/media/images/cutlass-3.5.1-gemm-peak-performance.png"><img src="/NVIDIA/cutlass/raw/main/media/images/cutlass-3.5.1-gemm-peak-performance.png" style="max-width: 100%;"></a></p> <p align="center" dir="auto"><a target="_blank" rel="noopener noreferrer" href="/NVIDIA/cutlass/blob/main/media/images/cutlass-3.5.1-gemm-peak-performance-fp8.png"><img src="/NVIDIA/cutlass/raw/main/media/images/cutlass-3.5.1-gemm-peak-performance-fp8.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">CuTe</h1><a id="user-content-cute" class="anchor" aria-label="Permalink: CuTe" href="#cute"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">CUTLASS 3.0 introduced a new core library, CuTe, to describe and manipulate tensors of threads and data. CuTe is a collection of C++ CUDA template abstractions for defining and operating on hierarchically multidimensional layouts of threads and data. CuTe provides <code>Layout</code> and <code>Tensor</code> objects that compactly package the type, shape, memory space, and layout of data, while performing the complicated indexing for the user. This lets programmers focus on the logical descriptions of their algorithms while CuTe does the mechanical bookkeeping for them. With these tools, we can quickly design, implement, and modify all dense linear algebra operations.</p> <p dir="auto">The core abstractions of CuTe are hierarchically multidimensional layouts which can be composed with data arrays to represent tensors. The representation of layouts is powerful enough to represent nearly everything we need to implement efficient dense linear algebra. Layouts can also be combined and manipulated via functional composition, on which we build a large set of common operations such as tiling and partitioning.</p> <p dir="auto">CUTLASS 3.0 and beyond adopts CuTe throughout the GEMM hierarchy in its templates. This greatly simplifies the design and improves code composability and readability. More documentation specific to CuTe can be found in its <a href="/NVIDIA/cutlass/blob/main/media/docs/cute/00_quickstart.md">dedicated documentation directory</a>.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Compatibility</h1><a id="user-content-compatibility" class="anchor" aria-label="Permalink: Compatibility" href="#compatibility"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Minimum requirements:</p> <ul dir="auto"> <li>Architecture: Volta (compute capability 7.0)</li> <li>Compiler: Must support at least C++17</li> <li>CUDA Toolkit version: 11.4</li> </ul> <p dir="auto">CUTLASS requires a C++17 host compiler and performs best when built with the <a href="https://developer.nvidia.com/cuda-downloads" rel="nofollow"><strong>CUDA 12.8 Toolkit</strong></a>. It is also compatible with CUDA 11.4, CUDA 11.5, CUDA 11.6, CUDA 11.7, CUDA 11.8, and all other CUDA 12.x versions.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Operating Systems</h2><a id="user-content-operating-systems" class="anchor" aria-label="Permalink: Operating Systems" href="#operating-systems"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We have tested the following environments.</p> <markdown-accessiblity-table><table> <thead> <tr> <th><strong>Operating System</strong></th> <th><strong>Compiler</strong></th> </tr> </thead> <tbody> <tr> <td>Ubuntu 18.04</td> <td>GCC 7.5.0</td> </tr> <tr> <td>Ubuntu 20.04</td> <td>GCC 10.3.0</td> </tr> <tr> <td>Ubuntu 22.04</td> <td>GCC 11.2.0</td> </tr> </tbody> </table></markdown-accessiblity-table> <p dir="auto">Note: GCC 8.5.0 has known regressions regarding fold expressions and overloaded operators. Using GCC 7.5.0 or (preferred) GCC &gt;= 9 is recommended.</p> <p dir="auto">Note: CUTLASS 3.x builds are known to be down on Windows platforms for all CUDA toolkits. CUTLASS team is working on a fix.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Hardware</h2><a id="user-content-hardware" class="anchor" aria-label="Permalink: Hardware" href="#hardware"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">CUTLASS runs successfully on the following NVIDIA GPUs, and it is expected to be efficient on Volta, Turing, Ampere, Ada, and Hopper architecture based NVIDIA GPUs.</p> <markdown-accessiblity-table><table> <thead> <tr> <th><strong>GPU</strong></th> <th><strong>CUDA Compute Capability</strong></th> <th><strong>Minimum CUDA Toolkit Required by CUTLASS-3</strong></th> </tr> </thead> <tbody> <tr> <td>NVIDIA V100 Tensor Core GPU</td> <td>7.0</td> <td>11.4</td> </tr> <tr> <td>NVIDIA TitanV</td> <td>7.0</td> <td>11.4</td> </tr> <tr> <td>NVIDIA GeForce RTX 20x0 series</td> <td>7.5</td> <td>11.4</td> </tr> <tr> <td>NVIDIA T4</td> <td>7.5</td> <td>11.4</td> </tr> <tr> <td>NVIDIA A100 Tensor Core GPU</td> <td>8.0</td> <td>11.4</td> </tr> <tr> <td>NVIDIA A10</td> <td>8.6</td> <td>11.4</td> </tr> <tr> <td>NVIDIA GeForce RTX 30x0 series</td> <td>8.6</td> <td>11.4</td> </tr> <tr> <td>NVIDIA GeForce RTX 40x0 series</td> <td>8.9</td> <td>11.8</td> </tr> <tr> <td>NVIDIA L40</td> <td>8.9</td> <td>11.8</td> </tr> <tr> <td>NVIDIA H100 Tensor Core GPU</td> <td>9.0</td> <td>11.8</td> </tr> <tr> <td>NVIDIA H200 Tensor Core GPU</td> <td>9.0</td> <td>11.8</td> </tr> <tr> <td>NVIDIA B200 Tensor Core GPU</td> <td>10.0</td> <td>12.8</td> </tr> </tbody> </table></markdown-accessiblity-table> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Target Architecture</h2><a id="user-content-target-architecture" class="anchor" aria-label="Permalink: Target Architecture" href="#target-architecture"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">In general, PTX code generated for one target architecture can be run on future architectures (i.e., it is forward compatible). However, CUDA 12.0 introduced the concept of "architecture-accelerated features" whose PTX does not have forward compatibility guarantees. Several Hopper and Blackwell PTX instructions fall under this category of architecture-accelerated features, and thus require a <code>sm_90a</code> or <code>sm100a</code> target architecture (note the "a" appended). For more details on this and other architecture-accelerated instructions, please refer to the <a href="https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#feature-availability" rel="nofollow">CUDA Documentation</a>.</p> <p dir="auto">The target architecture information is passed on to CUTLASS via the cmake flag <code>CUTLASS_NVCC_ARCHS</code>. In order to maximize performance on Hopper GH100, users are required to build CUTLASS with <code>90a</code> as the target architecture. If a user accidentally builds a kernel which uses SM90a features (e.g. Hopper Tensor Core Instructions), using the SM90 target (note the lack of "a"), with either CUDA Toolkit 12 or 11.8, the kernel is expected to fail with a runtime error.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="cmake .. -DCUTLASS_NVCC_ARCHS=&quot;90a&quot;"><pre class="notranslate"><code>cmake .. -DCUTLASS_NVCC_ARCHS="90a" </code></pre></div> <p dir="auto">Or</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="cmake .. -DCUTLASS_NVCC_ARCHS=&quot;100a&quot; "><pre class="notranslate"><code>cmake .. -DCUTLASS_NVCC_ARCHS="100a" </code></pre></div> <p dir="auto">Note: The NVIDIA Blackwell SM100 architecture used in the datacenter products has a different compute capability than the one underpinning NVIDIA Blackwell GeForce RTX 50 series GPUs. As a result, kernels compiled for Blackwell SM100 architecture with arch conditional features (using <code>sm100a</code>) are not compatible with RTX 50 series GPUs.</p> <p dir="auto">Please refer to the <a href="/NVIDIA/cutlass/blob/main/media/docs/functionality.md">functionality documentation</a> for details on which kernels require which target architectures.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Documentation</h1><a id="user-content-documentation" class="anchor" aria-label="Permalink: Documentation" href="#documentation"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">CUTLASS is described in the following documents and the accompanying <a href="https://nvidia.github.io/cutlass" rel="nofollow">Doxygen documentation</a>.</p> <ul dir="auto"> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/quickstart.md">Quick Start Guide</a> - basics of building and running CUTLASS</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/functionality.md">Functionality</a> - summarizes functionality available in CUTLASS</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/efficient_gemm.md">Efficient GEMM in CUDA</a> - describes how GEMM kernels may be implemented efficiently in CUDA</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/cutlass_3x_design.md">CUTLASS 3.x Design</a> - describes the CUTLASS 3.x design, its benefits, and how CuTe enables us to write much more composable components</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/gemm_api_3x.md">GEMM API 3.x</a> - describes the CUTLASS 3.x GEMM model and C++ template concepts</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/gemm_api.md">GEMM API 2.x</a> - describes the CUTLASS 2.x GEMM model and C++ template concepts</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/implicit_gemm_convolution.md">Implicit GEMM Convolution</a> - describes 2-D and 3-D convolution in CUTLASS</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/code_organization.md">Code Organization</a> - describes the organization and contents of the CUTLASS project</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/terminology.md">Terminology</a> - describes terms used in the code</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/programming_guidelines.md">Programming Guidelines</a> - guidelines for writing efficient modern CUDA C++</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/fundamental_types.md">Fundamental types</a> - describes basic C++ classes used in CUTLASS to represent numeric quantities and arrays</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/layout.md">Layouts</a> - describes layouts of matrices and tensors in memory</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/tile_iterator_concept.md">Tile Iterators</a> - describes C++ concepts for iterating over tiles of matrices in memory</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/profiler.md">CUTLASS Profiler</a> - command-line driven profiling application</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/utilities.md">CUTLASS Utilities</a> - additional templates used to facilitate rapid development</li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/dependent_kernel_launch.md">Dependent kernel launch</a> - describes a new feature in Hopper which allows overlapping dependent kernels in the same stream, and how it is used in CUTLASS.</li> </ul> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Resources</h1><a id="user-content-resources" class="anchor" aria-label="Permalink: Resources" href="#resources"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We have also described the structure of an efficient GEMM in our talk at the <a href="http://on-demand.gputechconf.com/gtc/2018/presentation/s8854-cutlass-software-primitives-for-dense-linear-algebra-at-all-levels-and-scales-within-cuda.pdf" rel="nofollow">GPU Technology Conference 2018</a>.</p> <ul dir="auto"> <li><a href="https://www.nvidia.com/en-us/on-demand/session/gtcsiliconvalley2018-s8854/" rel="nofollow">CUTLASS: Software Primitives for Dense Linear Algebra at All Levels and Scales within CUDA</a></li> <li><a href="https://www.nvidia.com/en-us/on-demand/session/gtcsj20-s21745/" rel="nofollow">Developing CUDA Kernels to Push Tensor Cores to the Absolute Limit on NVIDIA A100</a></li> <li><a href="https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s31883/" rel="nofollow">Accelerating Convolution with Tensor Cores in CUTLASS</a></li> <li><a href="https://www.nvidia.com/en-us/on-demand/session/gtcspring22-s41996/" rel="nofollow">Accelerating Backward Data Gradient by Increasing Tensor Core Utilization in CUTLASS</a></li> <li><a href="https://www.nvidia.com/en-us/on-demand/session/gtcfall22-a41131/" rel="nofollow">CUTLASS: Python API, Enhancements, and NVIDIA Hopper</a></li> </ul> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Building CUTLASS</h1><a id="user-content-building-cutlass" class="anchor" aria-label="Permalink: Building CUTLASS" href="#building-cutlass"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">CUTLASS is a header-only template library and does not need to be built to be used by other projects. Client applications should target CUTLASS's <code>include/</code> directory in their include paths.</p> <p dir="auto">CUTLASS unit tests, examples, and utilities can be build with CMake. The minimum version of CMake is given in the <a href="/NVIDIA/cutlass/blob/main/media/docs/quickstart.md">Quickstart guide</a>. Make sure the <code>CUDACXX</code> environment variable points to NVCC in the CUDA Toolkit installed on your system.</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ export CUDACXX=${CUDA_INSTALL_PATH}/bin/nvcc"><pre>$ <span class="pl-k">export</span> CUDACXX=<span class="pl-smi">${CUDA_INSTALL_PATH}</span>/bin/nvcc</pre></div> <p dir="auto">Create a build directory within the CUTLASS project, then run CMake. By default CUTLASS will build kernels for CUDA architecture versions 5.0, 6.0, 6.1, 7.0, 7.5, 8.0, 8.6, 8.9, and 9.0. To reduce compile time you can specify the architectures to build CUTLASS for by changing the CMake configuration setting <code>CUTLASS_NVCC_ARCHS</code>.</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ mkdir build &amp;&amp; cd build $ cmake .. -DCUTLASS_NVCC_ARCHS=80 # compiles for NVIDIA's Ampere Architecture"><pre>$ mkdir build <span class="pl-k">&amp;&amp;</span> <span class="pl-c1">cd</span> build $ cmake .. -DCUTLASS_NVCC_ARCHS=80 <span class="pl-c"><span class="pl-c">#</span> compiles for NVIDIA's Ampere Architecture</span></pre></div> <p dir="auto">From the <code>build/</code> directory, compile and run the CUTLASS unit tests by building the target <code>test_unit</code> with make.</p> <p dir="auto">The unit tests are organized as several binaries mirroring the top-level namespaces of CUTLASS, and they may be executed in parallel via make's <code>-j</code> command line argument.</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ make test_unit -j ... ... ... [----------] Global test environment tear-down [==========] 946 tests from 57 test cases ran. (10812 ms total) [ PASSED ] 946 tests."><pre>$ make test_unit -j ... ... ... [----------] Global <span class="pl-c1">test</span> environment tear-down [<span class="pl-k">==========</span>] 946 tests from 57 <span class="pl-c1">test</span> cases ran. (10812 ms total) [ PASSED ] 946 tests.</pre></div> <p dir="auto">All tests should pass on supported platforms, though the exact number of tests may vary over time.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Project Structure</h1><a id="user-content-project-structure" class="anchor" aria-label="Permalink: Project Structure" href="#project-structure"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">CUTLASS is arranged as a header-only library along with Utilities, Tools, Examples, and unit tests. <a href="https://nvidia.github.io/cutlass" rel="nofollow">Doxygen documentation</a> provides a complete list of files, classes, and template concepts defined in the CUTLASS project.</p> <p dir="auto">A detailed explanation of the source code organization may be found in the <a href="/NVIDIA/cutlass/blob/main/media/docs/code_organization.md">CUTLASS documentation</a>, but several main components are summarized below.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">CUTLASS Template Library</h2><a id="user-content-cutlass-template-library" class="anchor" aria-label="Permalink: CUTLASS Template Library" href="#cutlass-template-library"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="include/ # client applications should target this directory in their build's include paths cutlass/ # CUDA Templates for Linear Algebra Subroutines and Solvers - headers only arch/ # direct exposure of architecture features (including instruction-level GEMMs) conv/ # code specialized for convolution epilogue/ # code specialized for the epilogue of gemm/convolution gemm/ # code specialized for general matrix product computations layout/ # layout definitions for matrices, tensors, and other mathematical objects in memory platform/ # CUDA-capable Standard Library components reduction/ # bandwidth-limited reduction kernels that do not fit the &quot;gemm&quot; model thread/ # simt code that can be performed within a CUDA thread transform/ # code specialized for layout, type, and domain transformations * # core vocabulary types, containers, and basic numeric operations cute/ # CuTe Layout, layout algebra, MMA/Copy atoms, tiled MMA/Copy algorithm/ # Definitions of core operations such as copy, gemm, and operations on cute::tuples arch/ # Bare bones PTX wrapper structs for copy and math instructions atom/ # Meta-information either link to or built from arch/ operators mma_atom.hpp # cute::Mma_Atom and cute::TiledMma copy_atom.hpp # cute::Copy_Atom and cute::TiledCopy *sm*.hpp # Arch specific meta-information for copy and math operations * # Core library types such as Shape, Stride, Layout, Tensor, and associated operations "><pre class="notranslate"><code>include/ # client applications should target this directory in their build's include paths cutlass/ # CUDA Templates for Linear Algebra Subroutines and Solvers - headers only arch/ # direct exposure of architecture features (including instruction-level GEMMs) conv/ # code specialized for convolution epilogue/ # code specialized for the epilogue of gemm/convolution gemm/ # code specialized for general matrix product computations layout/ # layout definitions for matrices, tensors, and other mathematical objects in memory platform/ # CUDA-capable Standard Library components reduction/ # bandwidth-limited reduction kernels that do not fit the "gemm" model thread/ # simt code that can be performed within a CUDA thread transform/ # code specialized for layout, type, and domain transformations * # core vocabulary types, containers, and basic numeric operations cute/ # CuTe Layout, layout algebra, MMA/Copy atoms, tiled MMA/Copy algorithm/ # Definitions of core operations such as copy, gemm, and operations on cute::tuples arch/ # Bare bones PTX wrapper structs for copy and math instructions atom/ # Meta-information either link to or built from arch/ operators mma_atom.hpp # cute::Mma_Atom and cute::TiledMma copy_atom.hpp # cute::Copy_Atom and cute::TiledCopy *sm*.hpp # Arch specific meta-information for copy and math operations * # Core library types such as Shape, Stride, Layout, Tensor, and associated operations </code></pre></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">CUTLASS SDK Examples</h3><a id="user-content-cutlass-sdk-examples" class="anchor" aria-label="Permalink: CUTLASS SDK Examples" href="#cutlass-sdk-examples"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><a href="/NVIDIA/cutlass/blob/main/examples">CUTLASS SDK examples</a> apply CUTLASS templates to implement basic computations.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Tools</h3><a id="user-content-tools" class="anchor" aria-label="Permalink: Tools" href="#tools"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="tools/ library/ # CUTLASS Instance Library - contains instantiations of all supported CUTLASS templates include/ cutlass/ library/ profiler/ # CUTLASS Profiler - command-line utility for executing operations in the # CUTLASS Library util/ # CUTLASS Utilities - contains numerous helper classes for include/ # manging tensors in device memory, reference cutlass/ # implementations for GEMM, random initialization util/ # of tensors, and I/O."><pre class="notranslate"><code>tools/ library/ # CUTLASS Instance Library - contains instantiations of all supported CUTLASS templates include/ cutlass/ library/ profiler/ # CUTLASS Profiler - command-line utility for executing operations in the # CUTLASS Library util/ # CUTLASS Utilities - contains numerous helper classes for include/ # manging tensors in device memory, reference cutlass/ # implementations for GEMM, random initialization util/ # of tensors, and I/O. </code></pre></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Test</h3><a id="user-content-test" class="anchor" aria-label="Permalink: Test" href="#test"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The <code>test/unit/</code> directory consist of unit tests implemented with Google Test that demonstrate basic usage of Core API components and complete tests of the CUTLASS GEMM computations.</p> <p dir="auto">Instructions for building and running the Unit tests are described in the <a href="/NVIDIA/cutlass/blob/main/media/docs/quickstart.md">Quickstart guide</a>.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Performance Profiling</h1><a id="user-content-performance-profiling" class="anchor" aria-label="Permalink: Performance Profiling" href="#performance-profiling"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The <code>tools/profiler/</code> directory contains a command-line utility for launching each of the GEMM kernels. It can be built as follows:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ make cutlass_profiler -j16"><pre>$ make cutlass_profiler -j16</pre></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Building all GEMM and Convolution kernels (<em>long</em> build times)</h2><a id="user-content-building-all-gemm-and-convolution-kernels-long-build-times" class="anchor" aria-label="Permalink: Building all GEMM and Convolution kernels (long build times)" href="#building-all-gemm-and-convolution-kernels-long-build-times"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">By default, only one tile size is instantiated for each data type, math instruction, and layout. To instantiate all, set the following environment variable when running CMake from an empty <code>build/</code> directory. Beware, this results in <em>tens of thousands</em> of kernels and long build times. This would also result in a large binary size and on some platforms linker to fail on building the library. Therefore, it's highly recommended to generate only a subset of kernels as demonstrated in the sub-section below.</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ cmake .. -DCUTLASS_NVCC_ARCHS=90a -DCUTLASS_LIBRARY_KERNELS=all ... $ make cutlass_profiler -j16"><pre>$ cmake .. -DCUTLASS_NVCC_ARCHS=90a -DCUTLASS_LIBRARY_KERNELS=all ... $ make cutlass_profiler -j16</pre></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Building a subset of GEMM and Convolution kernels (<em>reduced</em> build times)</h2><a id="user-content-building-a-subset-of-gemm-and-convolution-kernels-reduced-build-times" class="anchor" aria-label="Permalink: Building a subset of GEMM and Convolution kernels (reduced build times)" href="#building-a-subset-of-gemm-and-convolution-kernels-reduced-build-times"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To compile strictly one kernel or a small set of kernels, a comma-delimited list of kernel names with wildcard characters may be used to reduce the set of kernels. The following examples show building exactly one or a subset of kernels for NVIDIA Ampere and Turing architecture:</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Building a subset Tensor Core GEMM kernels</h3><a id="user-content-building-a-subset-tensor-core-gemm-kernels" class="anchor" aria-label="Permalink: Building a subset Tensor Core GEMM kernels" href="#building-a-subset-tensor-core-gemm-kernels"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To compile a subset of Tensor Core GEMM kernels with FP32 accumulation and FP16 input targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s*gemm_f16_*_nt_align8 ... $ make cutlass_profiler -j16"><pre>$ cmake .. -DCUTLASS_NVCC_ARCHS=<span class="pl-s"><span class="pl-pds">'</span>75;80<span class="pl-pds">'</span></span> -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s<span class="pl-k">*</span>gemm_f16_<span class="pl-k">*</span>_nt_align8 ... $ make cutlass_profiler -j16</pre></div> <p dir="auto">Example command line for profiling a subset of Tensor Core GEMM kernels is as follows:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s*gemm_f16_*_nt_align8 --m=3456 --n=4096 --k=4096 ... ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s1688gemm_f16_256x128_32x2_nt_align8 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Passed Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=4096 --A=f16:column --B=f16:row --C=f32:column --alpha=1 \ --beta=0 --split_k_slices=1 --batch_count=1 --op_class=tensorop --accum=f32 --cta_m=256 --cta_n=128 \ --cta_k=32 --stages=2 --warps_m=4 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=8 --min_cc=75 \ --max_cc=1024 Bytes: 118489088 bytes FLOPs: 115992428544 flops Runtime: 1.55948 ms Memory: 70.7616 GiB/s Math: 74378.8 GFLOP/s ============================= ..."><pre>./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s<span class="pl-k">*</span>gemm_f16_<span class="pl-k">*</span>_nt_align8 --m=3456 --n=4096 --k=4096 ... ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_tensorop_s1688gemm_f16_256x128_32x2_nt_align8 Status: Success Verification: ON Disposition: Passed reference_device: Passed cuBLAS: Passed Arguments: --gemm_kind=universal --m=3456 --n=4096 --k=4096 --A=f16:column --B=f16:row --C=f32:column --alpha=1 \ --beta=0 --split_k_slices=1 --batch_count=1 --op_class=tensorop --accum=f32 --cta_m=256 --cta_n=128 \ --cta_k=32 --stages=2 --warps_m=4 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=8 --min_cc=75 \ --max_cc=1024 Bytes: 118489088 bytes FLOPs: 115992428544 flops Runtime: 1.55948 ms Memory: 70.7616 GiB/s Math: 74378.8 GFLOP/s ============================= ...</pre></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Building one CUDA Core GEMM kernel</h3><a id="user-content-building-one-cuda-core-gemm-kernel" class="anchor" aria-label="Permalink: Building one CUDA Core GEMM kernel" href="#building-one-cuda-core-gemm-kernel"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To compile one SGEMM kernel targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sgemm_128x128_8x2_nn_align1 ... $ make cutlass_profiler -j16"><pre>$ cmake .. -DCUTLASS_NVCC_ARCHS=<span class="pl-s"><span class="pl-pds">'</span>75;80<span class="pl-pds">'</span></span> -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sgemm_128x128_8x2_nn_align1 ... $ make cutlass_profiler -j16</pre></div> <p dir="auto">Example command line for profiling single SGEMM CUDA kernel is as follows:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ ./tools/profiler/cutlass_profiler --kernels=sgemm --m=3456 --n=4096 --k=4096 ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_simt_sgemm_128x128_8x2_nn_align1 Status: Success Verification: ON Disposition: Passed cuBLAS: Passed Arguments: --m=3456 --n=4096 --k=4096 --A=f32:column --B=f32:column --C=f32:column --alpha=1 --beta=0 --split_k_slices=1 \ --batch_count=1 --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \ --warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024 Bytes: 180355072 bytes FLOPs: 115992428544 flops Runtime: 6.73655 ms Memory: 24.934 GiB/s Math: 17218.4 GFLOP/s ============================="><pre>$ ./tools/profiler/cutlass_profiler --kernels=sgemm --m=3456 --n=4096 --k=4096 ============================= Problem ID: 1 Provider: CUTLASS OperationKind: gemm Operation: cutlass_simt_sgemm_128x128_8x2_nn_align1 Status: Success Verification: ON Disposition: Passed cuBLAS: Passed Arguments: --m=3456 --n=4096 --k=4096 --A=f32:column --B=f32:column --C=f32:column --alpha=1 --beta=0 --split_k_slices=1 \ --batch_count=1 --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \ --warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024 Bytes: 180355072 bytes FLOPs: 115992428544 flops Runtime: 6.73655 ms Memory: 24.934 GiB/s Math: 17218.4 GFLOP/s =============================</pre></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Building a subset of Tensor Core Convolution kernels</h3><a id="user-content-building-a-subset-of-tensor-core-convolution-kernels" class="anchor" aria-label="Permalink: Building a subset of Tensor Core Convolution kernels" href="#building-a-subset-of-tensor-core-convolution-kernels"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To compile a subset of Tensor core convolution kernels implementing forward propagation (fprop) with FP32 accumulation and FP16 input targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s*fprop_optimized_f16 ... $ make cutlass_profiler -j16"><pre>$ cmake .. -DCUTLASS_NVCC_ARCHS=<span class="pl-s"><span class="pl-pds">'</span>75;80<span class="pl-pds">'</span></span> -DCUTLASS_LIBRARY_KERNELS=cutlass_tensorop_s<span class="pl-k">*</span>fprop_optimized_f16 ... $ make cutlass_profiler -j16</pre></div> <p dir="auto">Example command line for profiling a subset of Tensor Core convolution kernels is as follows:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ ./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s*fprop_optimized_f16 --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 ... ============================= Problem ID: 1 Provider: CUTLASS OperationKind: conv2d Operation: cutlass_tensorop_s16816fprop_optimized_f16_128x128_32x5_nhwc Status: Success Verification: ON Disposition: Passed reference_device: Passed Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \ --stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f16:nhwc --Filter=f16:nhwc --Output=f32:nhwc \ --conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \ --eq_gemm_provider=none --op_class=tensorop --accum=f32 --cta_m=128 --cta_n=128 --cta_k=32 --stages=5 \ --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=16 --min_cc=80 --max_cc=1024 Bytes: 1130659840 bytes FLOPs: 118482796544 flops Runtime: 0.711496 ms Memory: 1479.99 GiB/s Math: 166526 GFLOP/s ============================= ..."><pre>$ ./tools/profiler/cutlass_profiler --kernels=cutlass_tensorop_s<span class="pl-k">*</span>fprop_optimized_f16 --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 ... ============================= Problem ID: 1 Provider: CUTLASS OperationKind: conv2d Operation: cutlass_tensorop_s16816fprop_optimized_f16_128x128_32x5_nhwc Status: Success Verification: ON Disposition: Passed reference_device: Passed Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \ --stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f16:nhwc --Filter=f16:nhwc --Output=f32:nhwc \ --conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \ --eq_gemm_provider=none --op_class=tensorop --accum=f32 --cta_m=128 --cta_n=128 --cta_k=32 --stages=5 \ --warps_m=2 --warps_n=2 --warps_k=1 --inst_m=16 --inst_n=8 --inst_k=16 --min_cc=80 --max_cc=1024 Bytes: 1130659840 bytes FLOPs: 118482796544 flops Runtime: 0.711496 ms Memory: 1479.99 GiB/s Math: 166526 GFLOP/s ============================= ...</pre></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Building one Convolution CUDA kernel</h3><a id="user-content-building-one-convolution-cuda-kernel" class="anchor" aria-label="Permalink: Building one Convolution CUDA kernel" href="#building-one-convolution-cuda-kernel"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To compile and run one CUDA Core convolution kernel implementing forward propagation (fprop) with F32 accumulation and FP32 input targeting NVIDIA Ampere and Turing architecture, use the below cmake command line:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ cmake .. -DCUTLASS_NVCC_ARCHS='75;80' -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc ... $ make cutlass_profiler -j16"><pre>$ cmake .. -DCUTLASS_NVCC_ARCHS=<span class="pl-s"><span class="pl-pds">'</span>75;80<span class="pl-pds">'</span></span> -DCUTLASS_LIBRARY_KERNELS=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc ... $ make cutlass_profiler -j16</pre></div> <p dir="auto">Example command line for profiling one CUDA Core convolution kernel:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ ./tools/profiler/cutlass_profiler --kernels=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 ============================= Problem ID: 1 Provider: CUTLASS OperationKind: conv2d Operation: cutlass_simt_sfprop_optimized_128x128_8x2_nhwc Status: Success Verification: ON Disposition: Passed reference_device: Passed Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \ --stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f32:nhwc --Filter=f32:nhwc --Output=f32:nhwc \ --conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \ --eq_gemm_provider=none --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \ --warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024 Bytes: 2055798784 bytes FLOPs: 118482796544 flops Runtime: 7.34266 ms Memory: 260.752 GiB/s Math: 16136.2 GFLOP/s ============================= "><pre>$ ./tools/profiler/cutlass_profiler --kernels=cutlass_simt_sfprop_optimized_128x128_8x2_nhwc --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 ============================= Problem ID: 1 Provider: CUTLASS OperationKind: conv2d Operation: cutlass_simt_sfprop_optimized_128x128_8x2_nhwc Status: Success Verification: ON Disposition: Passed reference_device: Passed Arguments: --conv_kind=fprop --n=8 --h=224 --w=224 --c=128 --k=128 --r=3 --s=3 --p=224 --q=224 --pad_h=1 --pad_w=1 \ --stride_h=1 --stride_w=1 --dilation_h=1 --dilation_w=1 --Activation=f32:nhwc --Filter=f32:nhwc --Output=f32:nhwc \ --conv_mode=cross --iterator_algorithm=optimized --alpha=1 --beta=0 --split_k_mode=serial --split_k_slices=1 \ --eq_gemm_provider=none --op_class=simt --accum=f32 --cta_m=128 --cta_n=128 --cta_k=8 --stages=2 --warps_m=4 \ --warps_n=2 --warps_k=1 --inst_m=1 --inst_n=1 --inst_k=1 --min_cc=50 --max_cc=1024 Bytes: 2055798784 bytes FLOPs: 118482796544 flops Runtime: 7.34266 ms Memory: 260.752 GiB/s Math: 16136.2 GFLOP/s ============================= </pre></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">More Details on Compiling CUTLASS Kernels and CUTLASS Profiler</h2><a id="user-content-more-details-on-compiling-cutlass-kernels-and-cutlass-profiler" class="anchor" aria-label="Permalink: More Details on Compiling CUTLASS Kernels and CUTLASS Profiler" href="#more-details-on-compiling-cutlass-kernels-and-cutlass-profiler"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Please follow the links for more CMake examples on selectively compiling CUTLASS kernels: <ul dir="auto"> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/quickstart.md#gemm-cmake-examples">GEMM CMake Examples</a></li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/quickstart.md#convolution-cmake-examples">Implicit GEMM convolution CMake Examples</a></li> </ul> </li> <li><a href="/NVIDIA/cutlass/blob/main/media/docs/profiler.md">Further details about the CUTLASS Profiler are described here.</a></li> </ul> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">About</h1><a id="user-content-about" class="anchor" aria-label="Permalink: About" href="#about"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">CUTLASS is released by NVIDIA Corporation as Open Source software under the <a href="/NVIDIA/cutlass/blob/main/LICENSE.txt">3-clause "New" BSD license</a>.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Contributors</h1><a id="user-content-contributors" class="anchor" aria-label="Permalink: Contributors" href="#contributors"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The official list of CUTLASS developers and contributors is available here: <a href="/NVIDIA/cutlass/blob/main/CONTRIBUTORS.md">CONTRIBUTORS</a>.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Copyright</h1><a id="user-content-copyright" class="anchor" aria-label="Permalink: Copyright" href="#copyright"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Copyright (c) 2017 - 2025 NVIDIA CORPORATION &amp; AFFILIATES. All rights reserved. SPDX-License-Identifier: BSD-3-Clause</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content=" Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS &quot;AS IS&quot; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."><pre class="notranslate"><code> Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. </code></pre></div> </article></div></div></div></div></div> <!-- --> <!-- --> <script type="application/json" id="__PRIMER_DATA_:R0:__">{"resolvedServerColorMode":"day"}</script></div> </react-partial> <input type="hidden" data-csrf="true" value="cEIoBwB/oYlkF9G026MyWIIzXKPTPfqyiyArPuWKhp/hRNRrsV8MojLMJAuGyRCuMe0dPlGI5bBRYTQLFH0Usw==" /> </div> <div data-view-component="true" class="Layout-sidebar"> <div class="BorderGrid about-margin" data-pjax> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <div class="hide-sm hide-md"> <h2 class="mb-3 h4">About</h2> <p class="f4 my-3"> CUDA Templates for Linear Algebra Subroutines </p> <h3 class="sr-only">Topics</h3> <div class="my-3"> <div class="f6"> <a href="/topics/deep-learning" title="Topic: deep-learning" data-view-component="true" class="topic-tag topic-tag-link"> deep-learning </a> <a href="/topics/cpp" title="Topic: cpp" data-view-component="true" class="topic-tag topic-tag-link"> cpp </a> <a href="/topics/gpu" title="Topic: gpu" data-view-component="true" class="topic-tag topic-tag-link"> gpu </a> <a href="/topics/cuda" title="Topic: cuda" data-view-component="true" class="topic-tag topic-tag-link"> cuda </a> <a href="/topics/nvidia" title="Topic: nvidia" data-view-component="true" class="topic-tag topic-tag-link"> nvidia </a> <a href="/topics/deep-learning-library" title="Topic: deep-learning-library" data-view-component="true" class="topic-tag topic-tag-link"> deep-learning-library </a> </div> </div> <h3 class="sr-only">Resources</h3> <div class="mt-2"> <a class="Link--muted" data-analytics-event="{&quot;category&quot;:&quot;Repository Overview&quot;,&quot;action&quot;:&quot;click&quot;,&quot;label&quot;:&quot;location:sidebar;file:readme&quot;}" href="#readme-ov-file"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book mr-2"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> Readme </a> </div> <h3 class="sr-only">License</h3> <div class="mt-2"> <a href="#License-1-ov-file" class="Link--muted" data-analytics-event="{&quot;category&quot;:&quot;Repository Overview&quot;,&quot;action&quot;:&quot;click&quot;,&quot;label&quot;:&quot;location:sidebar;file:license&quot;}" > <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-law mr-2"> <path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"></path> </svg> View license </a> </div> <include-fragment src="/NVIDIA/cutlass/hovercards/citation/sidebar_partial?tree_name=main"> </include-fragment> <div class="mt-2"> <a href="/NVIDIA/cutlass/activity" data-view-component="true" class="Link Link--muted"><svg text="gray" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-pulse mr-2"> <path d="M6 2c.306 0 .582.187.696.471L10 10.731l1.304-3.26A.751.751 0 0 1 12 7h3.25a.75.75 0 0 1 0 1.5h-2.742l-1.812 4.528a.751.751 0 0 1-1.392 0L6 4.77 4.696 8.03A.75.75 0 0 1 4 8.5H.75a.75.75 0 0 1 0-1.5h2.742l1.812-4.529A.751.751 0 0 1 6 2Z"></path> </svg> <span class="color-fg-muted">Activity</span></a> </div> <div class="mt-2"> <a href="/NVIDIA/cutlass/custom-properties" data-view-component="true" class="Link Link--muted"><svg text="gray" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-note mr-2"> <path d="M0 3.75C0 2.784.784 2 1.75 2h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0 1 14.25 14H1.75A1.75 1.75 0 0 1 0 12.25Zm1.75-.25a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-8.5a.25.25 0 0 0-.25-.25ZM3.5 6.25a.75.75 0 0 1 .75-.75h7a.75.75 0 0 1 0 1.5h-7a.75.75 0 0 1-.75-.75Zm.75 2.25h4a.75.75 0 0 1 0 1.5h-4a.75.75 0 0 1 0-1.5Z"></path> </svg> <span class="color-fg-muted">Custom properties</span></a> </div> <h3 class="sr-only">Stars</h3> <div class="mt-2"> <a href="/NVIDIA/cutlass/stargazers" data-view-component="true" class="Link Link--muted"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star mr-2"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg> <strong>6.2k</strong> stars</a> </div> <h3 class="sr-only">Watchers</h3> <div class="mt-2"> <a href="/NVIDIA/cutlass/watchers" data-view-component="true" class="Link Link--muted"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-eye mr-2"> <path d="M8 2c1.981 0 3.671.992 4.933 2.078 1.27 1.091 2.187 2.345 2.637 3.023a1.62 1.62 0 0 1 0 1.798c-.45.678-1.367 1.932-2.637 3.023C11.67 13.008 9.981 14 8 14c-1.981 0-3.671-.992-4.933-2.078C1.797 10.83.88 9.576.43 8.898a1.62 1.62 0 0 1 0-1.798c.45-.677 1.367-1.931 2.637-3.022C4.33 2.992 6.019 2 8 2ZM1.679 7.932a.12.12 0 0 0 0 .136c.411.622 1.241 1.75 2.366 2.717C5.176 11.758 6.527 12.5 8 12.5c1.473 0 2.825-.742 3.955-1.715 1.124-.967 1.954-2.096 2.366-2.717a.12.12 0 0 0 0-.136c-.412-.621-1.242-1.75-2.366-2.717C10.824 4.242 9.473 3.5 8 3.5c-1.473 0-2.825.742-3.955 1.715-1.124.967-1.954 2.096-2.366 2.717ZM8 10a2 2 0 1 1-.001-3.999A2 2 0 0 1 8 10Z"></path> </svg> <strong>111</strong> watching</a> </div> <h3 class="sr-only">Forks</h3> <div class="mt-2"> <a href="/NVIDIA/cutlass/forks" data-view-component="true" class="Link Link--muted"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo-forked mr-2"> <path d="M5 5.372v.878c0 .414.336.75.75.75h4.5a.75.75 0 0 0 .75-.75v-.878a2.25 2.25 0 1 1 1.5 0v.878a2.25 2.25 0 0 1-2.25 2.25h-1.5v2.128a2.251 2.251 0 1 1-1.5 0V8.5h-1.5A2.25 2.25 0 0 1 3.5 6.25v-.878a2.25 2.25 0 1 1 1.5 0ZM5 3.25a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Zm6.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5Zm-3 8.75a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Z"></path> </svg> <strong>1.1k</strong> forks</a> </div> <div class="mt-2"> <a class="Link--muted" href="/contact/report-content?content_url=https%3A%2F%2Fgithub.com%2FNVIDIA%2Fcutlass&amp;report=NVIDIA+%28user%29"> Report repository </a> </div> </div> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame"> <a href="/NVIDIA/cutlass/releases" data-view-component="true" class="Link--primary no-underline Link">Releases <span title="35" data-view-component="true" class="Counter">35</span></a></h2> <a class="Link--primary d-flex no-underline" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" href="/NVIDIA/cutlass/releases/tag/v3.7.0"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-tag flex-shrink-0 mt-1 color-fg-success"> <path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path> </svg> <div class="ml-2 min-width-0"> <div class="d-flex"> <span class="css-truncate css-truncate-target text-bold mr-2" style="max-width: none;">CUTLASS 3.7.0</span> <span title="Label: Latest" data-view-component="true" class="Label Label--success flex-shrink-0"> Latest </span> </div> <div class="text-small color-fg-muted"><relative-time datetime="2025-01-18T15:07:29Z" class="no-wrap">Jan 18, 2025</relative-time></div> </div> </a> <div data-view-component="true" class="mt-3"> <a text="small" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" href="/NVIDIA/cutlass/releases" data-view-component="true" class="Link">+ 34 releases</a></div> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3"> <a href="/orgs/NVIDIA/packages?repo_name=cutlass" data-view-component="true" class="Link--primary no-underline Link d-flex flex-items-center">Packages <span title="0" hidden="hidden" data-view-component="true" class="Counter ml-1">0</span></a></h2> <div class="text-small color-fg-muted" > No packages published <br> </div> </div> </div> <div class="BorderGrid-row" hidden> <div class="BorderGrid-cell"> <include-fragment src="/NVIDIA/cutlass/used_by_list" accept="text/fragment+html"> </include-fragment> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3"> <a href="/NVIDIA/cutlass/graphs/contributors" data-view-component="true" class="Link--primary no-underline Link d-flex flex-items-center">Contributors <span title="164" data-view-component="true" class="Counter ml-1">164</span></a></h2> <ul class="list-style-none d-flex flex-wrap mb-n2"> <li class="mb-2 mr-2" > <a href="https://github.com/hwu36" class="" data-hovercard-type="user" data-hovercard-url="/users/hwu36/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/57973641?s=64&amp;v=4" alt="@hwu36" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/kerrmudgeon" class="" data-hovercard-type="user" data-hovercard-url="/users/kerrmudgeon/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/11259?s=64&amp;v=4" alt="@kerrmudgeon" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/ANIKET-SHIVAM" class="" data-hovercard-type="user" data-hovercard-url="/users/ANIKET-SHIVAM/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/3268307?s=64&amp;v=4" alt="@ANIKET-SHIVAM" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/dumerrill" class="" data-hovercard-type="user" data-hovercard-url="/users/dumerrill/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/1476032?s=64&amp;v=4" alt="@dumerrill" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/jackkosaian" class="" data-hovercard-type="user" data-hovercard-url="/users/jackkosaian/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/5831848?s=64&amp;v=4" alt="@jackkosaian" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/reed-lau" class="" data-hovercard-type="user" data-hovercard-url="/users/reed-lau/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/26267436?s=64&amp;v=4" alt="@reed-lau" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/Peter9606" class="" data-hovercard-type="user" data-hovercard-url="/users/Peter9606/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/2186779?s=64&amp;v=4" alt="@Peter9606" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/yzhaiustc" class="" data-hovercard-type="user" data-hovercard-url="/users/yzhaiustc/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/43502317?s=64&amp;v=4" alt="@yzhaiustc" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/manishucsd" class="" data-hovercard-type="user" data-hovercard-url="/users/manishucsd/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/3487276?s=64&amp;v=4" alt="@manishucsd" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/alihassanijr" class="" data-hovercard-type="user" data-hovercard-url="/users/alihassanijr/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/68103095?s=64&amp;v=4" alt="@alihassanijr" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/thakkarV" class="" data-hovercard-type="user" data-hovercard-url="/users/thakkarV/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/23328942?s=64&amp;v=4" alt="@thakkarV" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/danthe3rd" class="" data-hovercard-type="user" data-hovercard-url="/users/danthe3rd/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/43445237?s=64&amp;v=4" alt="@danthe3rd" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/mnicely" class="" data-hovercard-type="user" data-hovercard-url="/users/mnicely/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/50021634?s=64&amp;v=4" alt="@mnicely" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/Gregory-Meyer" class="" data-hovercard-type="user" data-hovercard-url="/users/Gregory-Meyer/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/23343587?s=64&amp;v=4" alt="@Gregory-Meyer" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> </ul> <div data-view-component="true" class="mt-3"> <a text="small" href="/NVIDIA/cutlass/graphs/contributors" data-view-component="true" class="Link--inTextBlock Link">+ 150 contributors</a></div> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3">Languages</h2> <div class="mb-2"> <span data-view-component="true" class="Progress"> <span style="background-color:#f34b7d !important;;width: 67.1%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#3A4E3A !important;;width: 25.8%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#3572A5 !important;;width: 4.2%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#e34c26 !important;;width: 2.2%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#DA3434 !important;;width: 0.6%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#555555 !important;;width: 0.1%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> </span></div> <ul class="list-style-none"> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/NVIDIA/cutlass/search?l=c%2B%2B" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#f34b7d;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">C++</span> <span>67.1%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/NVIDIA/cutlass/search?l=cuda" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#3A4E3A;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">Cuda</span> <span>25.8%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/NVIDIA/cutlass/search?l=python" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#3572A5;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">Python</span> <span>4.2%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/NVIDIA/cutlass/search?l=html" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#e34c26;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">HTML</span> <span>2.2%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/NVIDIA/cutlass/search?l=cmake" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#DA3434;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">CMake</span> <span>0.6%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/NVIDIA/cutlass/search?l=c" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#555555;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">C</span> <span>0.1%</span> </a> </li> </ul> </div> </div> </div> </div> </div></div> </div> </div> </turbo-frame> </main> </div> </div> <footer class="footer pt-8 pb-6 f6 color-fg-muted p-responsive" role="contentinfo" > <h2 class='sr-only'>Footer</h2> <div class="d-flex flex-justify-center flex-items-center flex-column-reverse flex-lg-row flex-wrap flex-lg-nowrap"> <div class="d-flex flex-items-center flex-shrink-0 mx-2"> <a aria-label="Homepage" title="GitHub" class="footer-octicon mr-2" href="https://github.com"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-mark-github"> <path d="M12.5.75C6.146.75 1 5.896 1 12.25c0 5.089 3.292 9.387 7.863 10.91.575.101.79-.244.79-.546 0-.273-.014-1.178-.014-2.142-2.889.532-3.636-.704-3.866-1.35-.13-.331-.69-1.352-1.18-1.625-.402-.216-.977-.748-.014-.762.906-.014 1.553.834 1.769 1.179 1.035 1.74 2.688 1.25 3.349.948.1-.747.402-1.25.733-1.538-2.559-.287-5.232-1.279-5.232-5.678 0-1.25.445-2.285 1.178-3.09-.115-.288-.517-1.467.115-3.048 0 0 .963-.302 3.163 1.179.92-.259 1.897-.388 2.875-.388.977 0 1.955.13 2.875.388 2.2-1.495 3.162-1.179 3.162-1.179.633 1.581.23 2.76.115 3.048.733.805 1.179 1.825 1.179 3.09 0 4.413-2.688 5.39-5.247 5.678.417.36.776 1.05.776 2.128 0 1.538-.014 2.774-.014 3.162 0 .302.216.662.79.547C20.709 21.637 24 17.324 24 12.25 24 5.896 18.854.75 12.5.75Z"></path> </svg> </a> <span> &copy; 2025 GitHub,&nbsp;Inc. </span> </div> <nav aria-label="Footer"> <h3 class="sr-only" id="sr-footer-heading">Footer navigation</h3> <ul class="list-style-none d-flex flex-justify-center flex-wrap mb-2 mb-lg-0" aria-labelledby="sr-footer-heading"> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to Terms&quot;,&quot;label&quot;:&quot;text:terms&quot;}" href="https://docs.github.com/site-policy/github-terms/github-terms-of-service" data-view-component="true" class="Link--secondary Link">Terms</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to privacy&quot;,&quot;label&quot;:&quot;text:privacy&quot;}" href="https://docs.github.com/site-policy/privacy-policies/github-privacy-statement" data-view-component="true" class="Link--secondary Link">Privacy</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to security&quot;,&quot;label&quot;:&quot;text:security&quot;}" href="https://github.com/security" data-view-component="true" class="Link--secondary Link">Security</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to status&quot;,&quot;label&quot;:&quot;text:status&quot;}" href="https://www.githubstatus.com/" data-view-component="true" class="Link--secondary Link">Status</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to docs&quot;,&quot;label&quot;:&quot;text:docs&quot;}" href="https://docs.github.com/" data-view-component="true" class="Link--secondary Link">Docs</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to contact&quot;,&quot;label&quot;:&quot;text:contact&quot;}" href="https://support.github.com?tags=dotcom-footer" data-view-component="true" class="Link--secondary Link">Contact</a> </li> <li class="mx-2" > <cookie-consent-link> <button type="button" class="Link--secondary underline-on-hover border-0 p-0 color-bg-transparent" data-action="click:cookie-consent-link#showConsentManagement" data-analytics-event="{&quot;location&quot;:&quot;footer&quot;,&quot;action&quot;:&quot;cookies&quot;,&quot;context&quot;:&quot;subfooter&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;cookies_link_subfooter_footer&quot;}" > Manage cookies </button> </cookie-consent-link> </li> <li class="mx-2"> <cookie-consent-link> <button type="button" class="Link--secondary underline-on-hover border-0 p-0 color-bg-transparent" data-action="click:cookie-consent-link#showConsentManagement" data-analytics-event="{&quot;location&quot;:&quot;footer&quot;,&quot;action&quot;:&quot;dont_share_info&quot;,&quot;context&quot;:&quot;subfooter&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;dont_share_info_link_subfooter_footer&quot;}" > Do not share my personal information </button> </cookie-consent-link> </li> </ul> </nav> </div> </footer> <ghcc-consent id="ghcc" class="position-fixed bottom-0 left-0" style="z-index: 999999" data-initial-cookie-consent-allowed="" data-cookie-consent-required="false"></ghcc-consent> <div id="ajax-error-message" class="ajax-error-message flash flash-error" hidden> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-alert"> <path d="M6.457 1.047c.659-1.234 2.427-1.234 3.086 0l6.082 11.378A1.75 1.75 0 0 1 14.082 15H1.918a1.75 1.75 0 0 1-1.543-2.575Zm1.763.707a.25.25 0 0 0-.44 0L1.698 13.132a.25.25 0 0 0 .22.368h12.164a.25.25 0 0 0 .22-.368Zm.53 3.996v2.5a.75.75 0 0 1-1.5 0v-2.5a.75.75 0 0 1 1.5 0ZM9 11a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> <button type="button" class="flash-close js-ajax-error-dismiss" aria-label="Dismiss error"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> You can’t perform that action at this time. </div> <template id="site-details-dialog"> <details class="details-reset details-overlay details-overlay-dark lh-default color-fg-default hx_rsm" open> <summary role="button" aria-label="Close dialog"></summary> <details-dialog class="Box Box--overlay d-flex flex-column anim-fade-in fast hx_rsm-dialog hx_rsm-modal"> <button class="Box-btn-octicon m-0 btn-octicon position-absolute right-0 top-0" type="button" aria-label="Close dialog" data-close-dialog> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> <div class="octocat-spinner my-6 js-details-dialog-spinner"></div> </details-dialog> </details> </template> <div class="Popover js-hovercard-content position-absolute" style="display: none; outline: none;"> <div class="Popover-message Popover-message--bottom-left Popover-message--large Box color-shadow-large" style="width:360px;"> </div> </div> <template id="snippet-clipboard-copy-button"> <div class="zeroclipboard-container position-absolute right-0 top-0"> <clipboard-copy aria-label="Copy" class="ClipboardButton btn js-clipboard-copy m-2 p-0" data-copy-feedback="Copied!" data-tooltip-direction="w"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copy js-clipboard-copy-icon m-2"> <path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"></path><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"></path> </svg> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-check js-clipboard-check-icon color-fg-success d-none m-2"> <path d="M13.78 4.22a.75.75 0 0 1 0 1.06l-7.25 7.25a.75.75 0 0 1-1.06 0L2.22 9.28a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018L6 10.94l6.72-6.72a.75.75 0 0 1 1.06 0Z"></path> </svg> </clipboard-copy> </div> </template> <template id="snippet-clipboard-copy-button-unpositioned"> <div class="zeroclipboard-container"> <clipboard-copy aria-label="Copy" class="ClipboardButton btn btn-invisible js-clipboard-copy m-2 p-0 d-flex flex-justify-center flex-items-center" data-copy-feedback="Copied!" data-tooltip-direction="w"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copy js-clipboard-copy-icon"> <path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"></path><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"></path> </svg> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-check js-clipboard-check-icon color-fg-success d-none"> <path d="M13.78 4.22a.75.75 0 0 1 0 1.06l-7.25 7.25a.75.75 0 0 1-1.06 0L2.22 9.28a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018L6 10.94l6.72-6.72a.75.75 0 0 1 1.06 0Z"></path> </svg> </clipboard-copy> </div> </template> </div> <div id="js-global-screen-reader-notice" class="sr-only mt-n1" aria-live="polite" aria-atomic="true" ></div> <div id="js-global-screen-reader-notice-assertive" class="sr-only mt-n1" aria-live="assertive" aria-atomic="true"></div> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10