CINXE.COM

GitHub - EleutherAI/gpt-neox: An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries

<!DOCTYPE html> <html lang="en" data-color-mode="auto" data-light-theme="light" data-dark-theme="dark" data-a11y-animated-images="system" data-a11y-link-underlines="true" > <head> <meta charset="utf-8"> <link rel="dns-prefetch" href="https://github.githubassets.com"> <link rel="dns-prefetch" href="https://avatars.githubusercontent.com"> <link rel="dns-prefetch" href="https://github-cloud.s3.amazonaws.com"> <link rel="dns-prefetch" href="https://user-images.githubusercontent.com/"> <link rel="preconnect" href="https://github.githubassets.com" crossorigin> <link rel="preconnect" href="https://avatars.githubusercontent.com"> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/light-74231a1f3bbb.css" /><link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/dark-8a995f0bacd4.css" /><link data-color-theme="dark_dimmed" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_dimmed-f37fb7684b1f.css" /><link data-color-theme="dark_high_contrast" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_high_contrast-9ac301c3ebe5.css" /><link data-color-theme="dark_colorblind" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_colorblind-cd826e8636dc.css" /><link data-color-theme="light_colorblind" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/light_colorblind-f91b0f603451.css" /><link data-color-theme="light_high_contrast" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/light_high_contrast-83beb16e0ecf.css" /><link data-color-theme="light_tritanopia" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/light_tritanopia-6e122dab64fc.css" /><link data-color-theme="dark_tritanopia" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_tritanopia-18119e682df0.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-primitives-225433424a87.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-aaa714e5674d.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/global-7eaba1d4847c.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/github-43ae85d4871b.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/repository-4fce88777fa8.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/code-0210be90f4d3.css" /> <script type="application/json" id="client-env">{"locale":"en","featureFlags":["a11y_quote_reply_fix","copilot_immersive_issue_preview","copilot_new_references_ui","copilot_chat_repo_custom_instructions_preview","copilot_no_floating_button","copilot_topics_as_references","copilot_read_shared_conversation","copilot_duplicate_thread","copilot_buffered_streaming","dotcom_chat_client_side_skills","experimentation_azure_variant_endpoint","failbot_handle_non_errors","fgpat_form_ui_updates","geojson_azure_maps","ghost_pilot_confidence_truncation_25","ghost_pilot_confidence_truncation_40","github_models_o3_mini_streaming","hovercard_accessibility","insert_before_patch","issues_react_remove_placeholders","issues_react_blur_item_picker_on_close","marketing_pages_search_explore_provider","primer_react_css_modules_ga","react_data_router_pull_requests","remove_child_patch","sample_network_conn_type","swp_enterprise_contact_form","site_proxima_australia_update","viewscreen_sandbox","issues_react_create_milestone","issues_react_cache_fix_workaround","lifecycle_label_name_updates","copilot_task_oriented_assistive_prompts","issues_react_assignee_warning","issue_types_prevent_private_type_creation","refresh_image_video_src","react_router_dispose_on_disconnect","codespaces_prebuild_region_target_update","turbo_app_id_restore"]}</script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/wp-runtime-3a4b3bfa0f10.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_oddbird_popover-polyfill_dist_popover_js-9da652f58479.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_arianotify-polyfill_ariaNotify-polyfill_js-node_modules_github_mi-3abb8f-46b9f4874d95.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_failbot_failbot_ts-75968cfb5298.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/environment-f04cb2a9fc8c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_primer_behaviors_dist_esm_index_mjs-0dbb79f97f8f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_selector-observer_dist_index_esm_js-f690fd9ae3d5.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_relative-time-element_dist_index_js-62d275b7ddd9.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_text-expander-element_dist_index_js-78748950cb0c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_auto-complete-element_dist_index_js-node_modules_github_catalyst_-8e9f78-a90ac05d2469.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_filter-input-element_dist_index_js-node_modules_github_remote-inp-b5f1d7-a1760ffda83d.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_markdown-toolbar-element_dist_index_js-ceef33f593fa.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_file-attachment-element_dist_index_js-node_modules_primer_view-co-c44a69-efa32db3a345.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/github-elements-394f8eb34f19.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/element-registry-991161c61b06.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_braintree_browser-detection_dist_browser-detection_js-node_modules_githu-2906d7-2a07a295af40.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_lit-html_lit-html_js-be8cb88f481b.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_index_js-node_modules_morphdom_dist_morphdom-e-7c534c-a4a1922eb55f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_turbo_dist_turbo_es2017-esm_js-a03ee12d659a.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-893f9f-b6294cf703b7.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_color-convert_index_js-e3180fe3bcb3.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_quote-selection_dist_index_js-node_modules_github_session-resume_-947061-e7a6c4a19f98.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_updatable-content_updatable-content_ts-2a55124d5c52.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_behaviors_task-list_ts-app_assets_modules_github_sso_ts-ui_packages-900dde-768abe60b1f8.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_sticky-scroll-into-view_ts-3e000c5d31a9.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_behaviors_ajax-error_ts-app_assets_modules_github_behaviors_include-87a4ae-b8865f653f6b.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_behaviors_commenting_edit_ts-app_assets_modules_github_behaviors_ht-83c235-e429cff6ceb1.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/behaviors-c8912a318570.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_delegated-events_dist_index_js-node_modules_github_catalyst_lib_index_js-f6223d90c7ba.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/notifications-global-01e85cd1be94.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_virtualized-list_es_index_js-node_modules_github_template-parts_lib_index_js-94dc7a2157c1.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-70450e-4b93df70b903.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_ref-selector_ts-3e9d848bab5f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/codespaces-c3bcacfe317c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_filter-input-element_dist_index_js-node_modules_github_remote-inp-3eebbd-0763620ad7bf.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_decorators_js-node_modules_delegated-events_di-e161aa-9d41fb1b6c9e.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_file-attachment-element_dist_index_js-node_modules_github_remote--3c9c82-b71ef90fbdc7.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/repositories-7a0dbaa42c57.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_index_js-node_modules_github_catalyst_lib_inde-dbbea9-26cce2010167.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/code-menu-1c0aedc134b1.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/primer-react-e05a7c4c5398.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/react-core-aaa76995a864.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/react-lib-f1bca44e0926.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/octicons-react-cf2f2ab8dab4.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_emotion_is-prop-valid_dist_emotion-is-prop-valid_esm_js-node_modules_emo-62da9f-2df2f32ec596.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_index_js-node_modules_stacktrace-parser_dist_s-e7dcdd-9a233856b02c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_oddbird_popover-polyfill_dist_popover-fn_js-55fea94174bf.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/notifications-subscriptions-menu-58a0c58bfee4.js"></script> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.9df1783473f10f02fb62.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/notifications-subscriptions-menu.1bcff9205c241e99cff2.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.9df1783473f10f02fb62.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/notifications-subscriptions-menu.1bcff9205c241e99cff2.module.css" /> <title>GitHub - EleutherAI/gpt-neox: An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries</title> <meta name="route-pattern" content="/:user_id/:repository" data-turbo-transient> <meta name="route-controller" content="files" data-turbo-transient> <meta name="route-action" content="disambiguate" data-turbo-transient> <meta name="current-catalog-service-hash" content="f3abb0cc802f3d7b95fc8762b94bdcb13bf39634c40c357301c4aa1d67a256fb"> <meta name="request-id" content="D7D0:1A8B5C:88760D:9EB002:67E3DB64" data-pjax-transient="true"/><meta name="html-safe-nonce" content="1edd7c039590afb347e5ccc3850459a82005d5768a0b54a9b5f36f89f1321f38" data-pjax-transient="true"/><meta name="visitor-payload" content="eyJyZWZlcnJlciI6IiIsInJlcXVlc3RfaWQiOiJEN0QwOjFBOEI1Qzo4ODc2MEQ6OUVCMDAyOjY3RTNEQjY0IiwidmlzaXRvcl9pZCI6IjMyNTA2NjQyMDE3OTE5ODY1MzIiLCJyZWdpb25fZWRnZSI6InNvdXRoZWFzdGFzaWEiLCJyZWdpb25fcmVuZGVyIjoic291dGhlYXN0YXNpYSJ9" data-pjax-transient="true"/><meta name="visitor-hmac" content="dbc297a162db101b9f3a632ce3e93f1670b3c5499ae8d318da6cd7bb439796b2" data-pjax-transient="true"/> <meta name="hovercard-subject-tag" content="repository:323651234" data-turbo-transient> <meta name="github-keyboard-shortcuts" content="repository,copilot" data-turbo-transient="true" /> <meta name="selected-link" value="repo_source" data-turbo-transient> <link rel="assets" href="https://github.githubassets.com/"> <meta name="google-site-verification" content="Apib7-x98H0j5cPqHWwSMm6dNU4GmODRoqxLiDzdx9I"> <meta name="octolytics-url" content="https://collector.github.com/github/collect" /> <meta name="analytics-location" content="/&lt;user-name&gt;/&lt;repo-name&gt;" data-turbo-transient="true" /> <meta name="user-login" content=""> <meta name="viewport" content="width=device-width"> <meta name="description" content="An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries - EleutherAI/gpt-neox"> <link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="GitHub"> <link rel="fluid-icon" href="https://github.com/fluidicon.png" title="GitHub"> <meta property="fb:app_id" content="1401488693436528"> <meta name="apple-itunes-app" content="app-id=1477376905, app-argument=https://github.com/EleutherAI/gpt-neox" /> <meta name="twitter:image" content="https://opengraph.githubassets.com/30d0dbda9977e73f98348b513ab560640b48846459f5d2245dd2b2547bd293ee/EleutherAI/gpt-neox" /><meta name="twitter:site" content="@github" /><meta name="twitter:card" content="summary_large_image" /><meta name="twitter:title" content="GitHub - EleutherAI/gpt-neox: An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries" /><meta name="twitter:description" content="An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries - EleutherAI/gpt-neox" /> <meta property="og:image" content="https://opengraph.githubassets.com/30d0dbda9977e73f98348b513ab560640b48846459f5d2245dd2b2547bd293ee/EleutherAI/gpt-neox" /><meta property="og:image:alt" content="An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries - EleutherAI/gpt-neox" /><meta property="og:image:width" content="1200" /><meta property="og:image:height" content="600" /><meta property="og:site_name" content="GitHub" /><meta property="og:type" content="object" /><meta property="og:title" content="GitHub - EleutherAI/gpt-neox: An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries" /><meta property="og:url" content="https://github.com/EleutherAI/gpt-neox" /><meta property="og:description" content="An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries - EleutherAI/gpt-neox" /> <meta name="hostname" content="github.com"> <meta name="expected-hostname" content="github.com"> <meta http-equiv="x-pjax-version" content="2cf3fd65e62446e9a9a844c0136a643e637b7dae189244e7bf20e405fc331edd" data-turbo-track="reload"> <meta http-equiv="x-pjax-csp-version" content="77190eb53eb47fc30bd2fcc17a7eefa2dfd8505869fee9299ba911be3a40a9eb" data-turbo-track="reload"> <meta http-equiv="x-pjax-css-version" content="1994cd18701e16e6efa87d97f308447f5b0f15b7ae2b58d73f3d026c94bd5edd" data-turbo-track="reload"> <meta http-equiv="x-pjax-js-version" content="8b39049e1b028b833af0a9ef42267031e795982bde408b6fd73a1dcd6086140e" data-turbo-track="reload"> <meta name="turbo-cache-control" content="no-preview" data-turbo-transient=""> <meta data-hydrostats="publish"> <meta name="go-import" content="github.com/EleutherAI/gpt-neox git https://github.com/EleutherAI/gpt-neox.git"> <meta name="octolytics-dimension-user_id" content="68924597" /><meta name="octolytics-dimension-user_login" content="EleutherAI" /><meta name="octolytics-dimension-repository_id" content="323651234" /><meta name="octolytics-dimension-repository_nwo" content="EleutherAI/gpt-neox" /><meta name="octolytics-dimension-repository_public" content="true" /><meta name="octolytics-dimension-repository_is_fork" content="false" /><meta name="octolytics-dimension-repository_network_root_id" content="323651234" /><meta name="octolytics-dimension-repository_network_root_nwo" content="EleutherAI/gpt-neox" /> <link rel="canonical" href="https://github.com/EleutherAI/gpt-neox" data-turbo-transient> <meta name="turbo-body-classes" content="logged-out env-production page-responsive"> <meta name="browser-stats-url" content="https://api.github.com/_private/browser/stats"> <meta name="browser-errors-url" content="https://api.github.com/_private/browser/errors"> <meta name="release" content="cd058404509ce960f6bc90879e187f2b525abb02"> <link rel="mask-icon" href="https://github.githubassets.com/assets/pinned-octocat-093da3e6fa40.svg" color="#000000"> <link rel="alternate icon" class="js-site-favicon" type="image/png" href="https://github.githubassets.com/favicons/favicon.png"> <link rel="icon" class="js-site-favicon" type="image/svg+xml" href="https://github.githubassets.com/favicons/favicon.svg" data-base-href="https://github.githubassets.com/favicons/favicon"> <meta name="theme-color" content="#1e2327"> <meta name="color-scheme" content="light dark" /> <link rel="manifest" href="/manifest.json" crossOrigin="use-credentials"> </head> <body class="logged-out env-production page-responsive" style="word-wrap: break-word;"> <div data-turbo-body class="logged-out env-production page-responsive" style="word-wrap: break-word;"> <div class="position-relative header-wrapper js-header-wrapper "> <a href="#start-of-content" data-skip-target-assigned="false" class="px-2 py-4 color-bg-accent-emphasis color-fg-on-emphasis show-on-focus js-skip-to-content">Skip to content</a> <span data-view-component="true" class="progress-pjax-loader Progress position-fixed width-full"> <span style="width: 0%;" data-view-component="true" class="Progress-item progress-pjax-loader-bar left-0 top-0 color-bg-accent-emphasis"></span> </span> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_ui-commands_ui-commands_ts-9fbfacd366dd.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/keyboard-shortcuts-dialog-33dfb803e078.js"></script> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.9df1783473f10f02fb62.module.css" /> <react-partial partial-name="keyboard-shortcuts-dialog" data-ssr="false" data-attempted-ssr="false" > <script type="application/json" data-target="react-partial.embeddedData">{"props":{"docsUrl":"https://docs.github.com/get-started/accessibility/keyboard-shortcuts"}}</script> <div data-target="react-partial.reactRoot"></div> </react-partial> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-94fd67-4898d1bf4b51.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/sessions-730dca81d0a2.js"></script> <header class="HeaderMktg header-logged-out js-details-container js-header Details f4 py-3" role="banner" data-is-top="true" data-color-mode=light data-light-theme=light data-dark-theme=dark> <h2 class="sr-only">Navigation Menu</h2> <button type="button" class="HeaderMktg-backdrop d-lg-none border-0 position-fixed top-0 left-0 width-full height-full js-details-target" aria-label="Toggle navigation"> <span class="d-none">Toggle navigation</span> </button> <div class="d-flex flex-column flex-lg-row flex-items-center px-3 px-md-4 px-lg-5 height-full position-relative z-1"> <div class="d-flex flex-justify-between flex-items-center width-full width-lg-auto"> <div class="flex-1"> <button aria-label="Toggle navigation" aria-expanded="false" type="button" data-view-component="true" class="js-details-target js-nav-padding-recalculate js-header-menu-toggle Button--link Button--medium Button d-lg-none color-fg-inherit p-1"> <span class="Button-content"> <span class="Button-label"><div class="HeaderMenu-toggle-bar rounded my-1"></div> <div class="HeaderMenu-toggle-bar rounded my-1"></div> <div class="HeaderMenu-toggle-bar rounded my-1"></div></span> </span> </button> </div> <a class="mr-lg-3 color-fg-inherit flex-order-2 js-prevent-focus-on-mobile-nav" href="/" aria-label="Homepage" data-analytics-event="{&quot;category&quot;:&quot;Marketing nav&quot;,&quot;action&quot;:&quot;click to go to homepage&quot;,&quot;label&quot;:&quot;ref_page:Marketing;ref_cta:Logomark;ref_loc:Header&quot;}"> <svg height="32" aria-hidden="true" viewBox="0 0 24 24" version="1.1" width="32" data-view-component="true" class="octicon octicon-mark-github"> <path d="M12 1C5.9225 1 1 5.9225 1 12C1 16.8675 4.14875 20.9787 8.52125 22.4362C9.07125 22.5325 9.2775 22.2025 9.2775 21.9137C9.2775 21.6525 9.26375 20.7862 9.26375 19.865C6.5 20.3737 5.785 19.1912 5.565 18.5725C5.44125 18.2562 4.905 17.28 4.4375 17.0187C4.0525 16.8125 3.5025 16.3037 4.42375 16.29C5.29 16.2762 5.90875 17.0875 6.115 17.4175C7.105 19.0812 8.68625 18.6137 9.31875 18.325C9.415 17.61 9.70375 17.1287 10.02 16.8537C7.5725 16.5787 5.015 15.63 5.015 11.4225C5.015 10.2262 5.44125 9.23625 6.1425 8.46625C6.0325 8.19125 5.6475 7.06375 6.2525 5.55125C6.2525 5.55125 7.17375 5.2625 9.2775 6.67875C10.1575 6.43125 11.0925 6.3075 12.0275 6.3075C12.9625 6.3075 13.8975 6.43125 14.7775 6.67875C16.8813 5.24875 17.8025 5.55125 17.8025 5.55125C18.4075 7.06375 18.0225 8.19125 17.9125 8.46625C18.6138 9.23625 19.04 10.2125 19.04 11.4225C19.04 15.6437 16.4688 16.5787 14.0213 16.8537C14.42 17.1975 14.7638 17.8575 14.7638 18.8887C14.7638 20.36 14.75 21.5425 14.75 21.9137C14.75 22.2025 14.9563 22.5462 15.5063 22.4362C19.8513 20.9787 23 16.8537 23 12C23 5.9225 18.0775 1 12 1Z"></path> </svg> </a> <div class="flex-1 flex-order-2 text-right"> <a href="/login?return_to=https%3A%2F%2Fgithub.com%2FEleutherAI%2Fgpt-neox" class="HeaderMenu-link HeaderMenu-button d-inline-flex d-lg-none flex-order-1 f5 no-underline border color-border-default rounded-2 px-2 py-1 color-fg-inherit js-prevent-focus-on-mobile-nav" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;site header menu&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;SIGN_UP&quot;,&quot;originating_url&quot;:&quot;https://github.com/EleutherAI/gpt-neox&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="afbdec1f18e0e13751cb576bdd5462cc2e92b3082e6a415bb0b214c35051b005" data-analytics-event="{&quot;category&quot;:&quot;Marketing nav&quot;,&quot;action&quot;:&quot;click to Sign in&quot;,&quot;label&quot;:&quot;ref_page:Marketing;ref_cta:Sign in;ref_loc:Header&quot;}" > Sign in </a> </div> </div> <div class="HeaderMenu js-header-menu height-fit position-lg-relative d-lg-flex flex-column flex-auto top-0"> <div class="HeaderMenu-wrapper d-flex flex-column flex-self-start flex-lg-row flex-auto rounded rounded-lg-0"> <nav class="HeaderMenu-nav" aria-label="Global"> <ul class="d-lg-flex list-style-none"> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Product <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 d-lg-flex flex-wrap dropdown-menu-wide"> <div class="HeaderMenu-column px-lg-4 border-lg-right mb-4 mb-lg-0 pr-lg-7"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_copilot&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_copilot_link_product_navbar&quot;}" href="https://github.com/features/copilot"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-copilot color-fg-subtle mr-3"> <path d="M23.922 16.992c-.861 1.495-5.859 5.023-11.922 5.023-6.063 0-11.061-3.528-11.922-5.023A.641.641 0 0 1 0 16.736v-2.869a.841.841 0 0 1 .053-.22c.372-.935 1.347-2.292 2.605-2.656.167-.429.414-1.055.644-1.517a10.195 10.195 0 0 1-.052-1.086c0-1.331.282-2.499 1.132-3.368.397-.406.89-.717 1.474-.952 1.399-1.136 3.392-2.093 6.122-2.093 2.731 0 4.767.957 6.166 2.093.584.235 1.077.546 1.474.952.85.869 1.132 2.037 1.132 3.368 0 .368-.014.733-.052 1.086.23.462.477 1.088.644 1.517 1.258.364 2.233 1.721 2.605 2.656a.832.832 0 0 1 .053.22v2.869a.641.641 0 0 1-.078.256ZM12.172 11h-.344a4.323 4.323 0 0 1-.355.508C10.703 12.455 9.555 13 7.965 13c-1.725 0-2.989-.359-3.782-1.259a2.005 2.005 0 0 1-.085-.104L4 11.741v6.585c1.435.779 4.514 2.179 8 2.179 3.486 0 6.565-1.4 8-2.179v-6.585l-.098-.104s-.033.045-.085.104c-.793.9-2.057 1.259-3.782 1.259-1.59 0-2.738-.545-3.508-1.492a4.323 4.323 0 0 1-.355-.508h-.016.016Zm.641-2.935c.136 1.057.403 1.913.878 2.497.442.544 1.134.938 2.344.938 1.573 0 2.292-.337 2.657-.751.384-.435.558-1.15.558-2.361 0-1.14-.243-1.847-.705-2.319-.477-.488-1.319-.862-2.824-1.025-1.487-.161-2.192.138-2.533.529-.269.307-.437.808-.438 1.578v.021c0 .265.021.562.063.893Zm-1.626 0c.042-.331.063-.628.063-.894v-.02c-.001-.77-.169-1.271-.438-1.578-.341-.391-1.046-.69-2.533-.529-1.505.163-2.347.537-2.824 1.025-.462.472-.705 1.179-.705 2.319 0 1.211.175 1.926.558 2.361.365.414 1.084.751 2.657.751 1.21 0 1.902-.394 2.344-.938.475-.584.742-1.44.878-2.497Z"></path><path d="M14.5 14.25a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Zm-5 0a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Z"></path> </svg> <div> <div class="color-fg-default h4">GitHub Copilot</div> Write better code with AI </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;security&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;security_link_product_navbar&quot;}" href="https://github.com/features/security"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-shield-check color-fg-subtle mr-3"> <path d="M16.53 9.78a.75.75 0 0 0-1.06-1.06L11 13.19l-1.97-1.97a.75.75 0 0 0-1.06 1.06l2.5 2.5a.75.75 0 0 0 1.06 0l5-5Z"></path><path d="m12.54.637 8.25 2.675A1.75 1.75 0 0 1 22 4.976V10c0 6.19-3.771 10.704-9.401 12.83a1.704 1.704 0 0 1-1.198 0C5.77 20.705 2 16.19 2 10V4.976c0-.758.489-1.43 1.21-1.664L11.46.637a1.748 1.748 0 0 1 1.08 0Zm-.617 1.426-8.25 2.676a.249.249 0 0 0-.173.237V10c0 5.46 3.28 9.483 8.43 11.426a.199.199 0 0 0 .14 0C17.22 19.483 20.5 15.461 20.5 10V4.976a.25.25 0 0 0-.173-.237l-8.25-2.676a.253.253 0 0 0-.154 0Z"></path> </svg> <div> <div class="color-fg-default h4">Security</div> Find and fix vulnerabilities </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;actions&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;actions_link_product_navbar&quot;}" href="https://github.com/features/actions"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-workflow color-fg-subtle mr-3"> <path d="M1 3a2 2 0 0 1 2-2h6.5a2 2 0 0 1 2 2v6.5a2 2 0 0 1-2 2H7v4.063C7 16.355 7.644 17 8.438 17H12.5v-2.5a2 2 0 0 1 2-2H21a2 2 0 0 1 2 2V21a2 2 0 0 1-2 2h-6.5a2 2 0 0 1-2-2v-2.5H8.437A2.939 2.939 0 0 1 5.5 15.562V11.5H3a2 2 0 0 1-2-2Zm2-.5a.5.5 0 0 0-.5.5v6.5a.5.5 0 0 0 .5.5h6.5a.5.5 0 0 0 .5-.5V3a.5.5 0 0 0-.5-.5ZM14.5 14a.5.5 0 0 0-.5.5V21a.5.5 0 0 0 .5.5H21a.5.5 0 0 0 .5-.5v-6.5a.5.5 0 0 0-.5-.5Z"></path> </svg> <div> <div class="color-fg-default h4">Actions</div> Automate any workflow </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;codespaces&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;codespaces_link_product_navbar&quot;}" href="https://github.com/features/codespaces"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-codespaces color-fg-subtle mr-3"> <path d="M3.5 3.75C3.5 2.784 4.284 2 5.25 2h13.5c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 18.75 13H5.25a1.75 1.75 0 0 1-1.75-1.75Zm-2 12c0-.966.784-1.75 1.75-1.75h17.5c.966 0 1.75.784 1.75 1.75v4a1.75 1.75 0 0 1-1.75 1.75H3.25a1.75 1.75 0 0 1-1.75-1.75ZM5.25 3.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h13.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Zm-2 12a.25.25 0 0 0-.25.25v4c0 .138.112.25.25.25h17.5a.25.25 0 0 0 .25-.25v-4a.25.25 0 0 0-.25-.25Z"></path><path d="M10 17.75a.75.75 0 0 1 .75-.75h6.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1-.75-.75Zm-4 0a.75.75 0 0 1 .75-.75h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1-.75-.75Z"></path> </svg> <div> <div class="color-fg-default h4">Codespaces</div> Instant dev environments </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;issues&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;issues_link_product_navbar&quot;}" href="https://github.com/features/issues"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-issue-opened color-fg-subtle mr-3"> <path d="M12 1c6.075 0 11 4.925 11 11s-4.925 11-11 11S1 18.075 1 12 5.925 1 12 1ZM2.5 12a9.5 9.5 0 0 0 9.5 9.5 9.5 9.5 0 0 0 9.5-9.5A9.5 9.5 0 0 0 12 2.5 9.5 9.5 0 0 0 2.5 12Zm9.5 2a2 2 0 1 1-.001-3.999A2 2 0 0 1 12 14Z"></path> </svg> <div> <div class="color-fg-default h4">Issues</div> Plan and track work </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;code_review&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;code_review_link_product_navbar&quot;}" href="https://github.com/features/code-review"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-code-review color-fg-subtle mr-3"> <path d="M10.3 6.74a.75.75 0 0 1-.04 1.06l-2.908 2.7 2.908 2.7a.75.75 0 1 1-1.02 1.1l-3.5-3.25a.75.75 0 0 1 0-1.1l3.5-3.25a.75.75 0 0 1 1.06.04Zm3.44 1.06a.75.75 0 1 1 1.02-1.1l3.5 3.25a.75.75 0 0 1 0 1.1l-3.5 3.25a.75.75 0 1 1-1.02-1.1l2.908-2.7-2.908-2.7Z"></path><path d="M1.5 4.25c0-.966.784-1.75 1.75-1.75h17.5c.966 0 1.75.784 1.75 1.75v12.5a1.75 1.75 0 0 1-1.75 1.75h-9.69l-3.573 3.573A1.458 1.458 0 0 1 5 21.043V18.5H3.25a1.75 1.75 0 0 1-1.75-1.75ZM3.25 4a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h2.5a.75.75 0 0 1 .75.75v3.19l3.72-3.72a.749.749 0 0 1 .53-.22h10a.25.25 0 0 0 .25-.25V4.25a.25.25 0 0 0-.25-.25Z"></path> </svg> <div> <div class="color-fg-default h4">Code Review</div> Manage code changes </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;discussions&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;discussions_link_product_navbar&quot;}" href="https://github.com/features/discussions"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-comment-discussion color-fg-subtle mr-3"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 14.25 14H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 15.543V14H1.75A1.75 1.75 0 0 1 0 12.25v-9.5C0 1.784.784 1 1.75 1ZM1.5 2.75v9.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-9.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Z"></path><path d="M22.5 8.75a.25.25 0 0 0-.25-.25h-3.5a.75.75 0 0 1 0-1.5h3.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 22.25 20H21v1.543a1.457 1.457 0 0 1-2.487 1.03L15.939 20H10.75A1.75 1.75 0 0 1 9 18.25v-1.465a.75.75 0 0 1 1.5 0v1.465c0 .138.112.25.25.25h5.5a.75.75 0 0 1 .53.22l2.72 2.72v-2.19a.75.75 0 0 1 .75-.75h2a.25.25 0 0 0 .25-.25v-9.5Z"></path> </svg> <div> <div class="color-fg-default h4">Discussions</div> Collaborate outside of code </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;code_search&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;code_search_link_product_navbar&quot;}" href="https://github.com/features/code-search"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-code-square color-fg-subtle mr-3"> <path d="M10.3 8.24a.75.75 0 0 1-.04 1.06L7.352 12l2.908 2.7a.75.75 0 1 1-1.02 1.1l-3.5-3.25a.75.75 0 0 1 0-1.1l3.5-3.25a.75.75 0 0 1 1.06.04Zm3.44 1.06a.75.75 0 1 1 1.02-1.1l3.5 3.25a.75.75 0 0 1 0 1.1l-3.5 3.25a.75.75 0 1 1-1.02-1.1l2.908-2.7-2.908-2.7Z"></path><path d="M2 3.75C2 2.784 2.784 2 3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25Zm1.75-.25a.25.25 0 0 0-.25.25v16.5c0 .138.112.25.25.25h16.5a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25Z"></path> </svg> <div> <div class="color-fg-default h4">Code Search</div> Find more, search less </div> </a></li> </ul> </div> </div> <div class="HeaderMenu-column px-lg-4"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="product-explore-heading">Explore</span> <ul class="list-style-none f5" aria-labelledby="product-explore-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;all_features&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;all_features_link_product_navbar&quot;}" href="https://github.com/features"> All features </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;documentation&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;documentation_link_product_navbar&quot;}" href="https://docs.github.com"> Documentation <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_skills&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_skills_link_product_navbar&quot;}" href="https://skills.github.com"> GitHub Skills <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;blog&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;blog_link_product_navbar&quot;}" href="https://github.blog"> Blog <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Solutions <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 d-lg-flex flex-wrap dropdown-menu-wide"> <div class="HeaderMenu-column px-lg-4 border-lg-right mb-4 mb-lg-0 pr-lg-7"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0 pb-lg-3 mb-3 mb-lg-0"> <span class="d-block h4 color-fg-default my-1" id="solutions-by-company-size-heading">By company size</span> <ul class="list-style-none f5" aria-labelledby="solutions-by-company-size-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;enterprises&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;enterprises_link_solutions_navbar&quot;}" href="https://github.com/enterprise"> Enterprises </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;small_and_medium_teams&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;small_and_medium_teams_link_solutions_navbar&quot;}" href="https://github.com/team"> Small and medium teams </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;startups&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;startups_link_solutions_navbar&quot;}" href="https://github.com/enterprise/startups"> Startups </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;nonprofits&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;nonprofits_link_solutions_navbar&quot;}" href="/solutions/industry/nonprofits"> Nonprofits </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="solutions-by-use-case-heading">By use case</span> <ul class="list-style-none f5" aria-labelledby="solutions-by-use-case-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;devsecops&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;devsecops_link_solutions_navbar&quot;}" href="/solutions/use-case/devsecops"> DevSecOps </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;devops&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;devops_link_solutions_navbar&quot;}" href="/solutions/use-case/devops"> DevOps </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;ci_cd&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;ci_cd_link_solutions_navbar&quot;}" href="/solutions/use-case/ci-cd"> CI/CD </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;view_all_use_cases&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;view_all_use_cases_link_solutions_navbar&quot;}" href="/solutions/use-case"> View all use cases </a></li> </ul> </div> </div> <div class="HeaderMenu-column px-lg-4"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="solutions-by-industry-heading">By industry</span> <ul class="list-style-none f5" aria-labelledby="solutions-by-industry-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;healthcare&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;healthcare_link_solutions_navbar&quot;}" href="/solutions/industry/healthcare"> Healthcare </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;financial_services&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;financial_services_link_solutions_navbar&quot;}" href="/solutions/industry/financial-services"> Financial services </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;manufacturing&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;manufacturing_link_solutions_navbar&quot;}" href="/solutions/industry/manufacturing"> Manufacturing </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;government&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;government_link_solutions_navbar&quot;}" href="/solutions/industry/government"> Government </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;view_all_industries&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;view_all_industries_link_solutions_navbar&quot;}" href="/solutions/industry"> View all industries </a></li> </ul> </div> </div> <div class="HeaderMenu-trailing-link rounded-bottom-2 flex-shrink-0 mt-lg-4 px-lg-4 py-4 py-lg-3 f5 text-semibold"> <a href="/solutions"> View all solutions <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-right HeaderMenu-trailing-link-icon"> <path d="M6.22 3.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L9.94 8 6.22 4.28a.75.75 0 0 1 0-1.06Z"></path> </svg> </a> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Resources <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 d-lg-flex flex-wrap dropdown-menu-wide"> <div class="HeaderMenu-column px-lg-4 border-lg-right mb-4 mb-lg-0 pr-lg-7"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="resources-topics-heading">Topics</span> <ul class="list-style-none f5" aria-labelledby="resources-topics-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;ai&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;ai_link_resources_navbar&quot;}" href="/resources/articles/ai"> AI </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;devops&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;devops_link_resources_navbar&quot;}" href="/resources/articles/devops"> DevOps </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;security&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;security_link_resources_navbar&quot;}" href="/resources/articles/security"> Security </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;software_development&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;software_development_link_resources_navbar&quot;}" href="/resources/articles/software-development"> Software Development </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;view_all&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;view_all_link_resources_navbar&quot;}" href="/resources/articles"> View all </a></li> </ul> </div> </div> <div class="HeaderMenu-column px-lg-4"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="resources-explore-heading">Explore</span> <ul class="list-style-none f5" aria-labelledby="resources-explore-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;learning_pathways&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;learning_pathways_link_resources_navbar&quot;}" href="https://resources.github.com/learn/pathways"> Learning Pathways <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;events_amp_webinars&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;events_amp_webinars_link_resources_navbar&quot;}" href="https://resources.github.com"> Events &amp; Webinars <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;ebooks_amp_whitepapers&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;ebooks_amp_whitepapers_link_resources_navbar&quot;}" href="https://github.com/resources/whitepapers"> Ebooks &amp; Whitepapers </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;customer_stories&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;customer_stories_link_resources_navbar&quot;}" href="https://github.com/customer-stories"> Customer Stories </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;partners&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;partners_link_resources_navbar&quot;}" href="https://partner.github.com"> Partners <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;executive_insights&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;executive_insights_link_resources_navbar&quot;}" href="https://github.com/solutions/executive-insights"> Executive Insights </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Open Source <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 px-lg-4"> <div class="HeaderMenu-column"> <div class="border-bottom pb-3 pb-lg-0 pb-lg-3 mb-3 mb-lg-0 mb-lg-3"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_sponsors&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_sponsors_link_open_source_navbar&quot;}" href="/sponsors"> <div> <div class="color-fg-default h4">GitHub Sponsors</div> Fund open source developers </div> </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 pb-lg-3 mb-3 mb-lg-0 mb-lg-3"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;the_readme_project&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;the_readme_project_link_open_source_navbar&quot;}" href="https://github.com/readme"> <div> <div class="color-fg-default h4">The ReadME Project</div> GitHub community articles </div> </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="open-source-repositories-heading">Repositories</span> <ul class="list-style-none f5" aria-labelledby="open-source-repositories-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;topics&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;topics_link_open_source_navbar&quot;}" href="https://github.com/topics"> Topics </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;trending&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;trending_link_open_source_navbar&quot;}" href="https://github.com/trending"> Trending </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;collections&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;collections_link_open_source_navbar&quot;}" href="https://github.com/collections"> Collections </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Enterprise <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 px-lg-4"> <div class="HeaderMenu-column"> <div class="border-bottom pb-3 pb-lg-0 pb-lg-3 mb-3 mb-lg-0 mb-lg-3"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;enterprise_platform&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;enterprise_platform_link_enterprise_navbar&quot;}" href="/enterprise"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-stack color-fg-subtle mr-3"> <path d="M11.063 1.456a1.749 1.749 0 0 1 1.874 0l8.383 5.316a1.751 1.751 0 0 1 0 2.956l-8.383 5.316a1.749 1.749 0 0 1-1.874 0L2.68 9.728a1.751 1.751 0 0 1 0-2.956Zm1.071 1.267a.25.25 0 0 0-.268 0L3.483 8.039a.25.25 0 0 0 0 .422l8.383 5.316a.25.25 0 0 0 .268 0l8.383-5.316a.25.25 0 0 0 0-.422Z"></path><path d="M1.867 12.324a.75.75 0 0 1 1.035-.232l8.964 5.685a.25.25 0 0 0 .268 0l8.964-5.685a.75.75 0 0 1 .804 1.267l-8.965 5.685a1.749 1.749 0 0 1-1.874 0l-8.965-5.685a.75.75 0 0 1-.231-1.035Z"></path><path d="M1.867 16.324a.75.75 0 0 1 1.035-.232l8.964 5.685a.25.25 0 0 0 .268 0l8.964-5.685a.75.75 0 0 1 .804 1.267l-8.965 5.685a1.749 1.749 0 0 1-1.874 0l-8.965-5.685a.75.75 0 0 1-.231-1.035Z"></path> </svg> <div> <div class="color-fg-default h4">Enterprise platform</div> AI-powered developer platform </div> </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="enterprise-available-add-ons-heading">Available add-ons</span> <ul class="list-style-none f5" aria-labelledby="enterprise-available-add-ons-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;advanced_security&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;advanced_security_link_enterprise_navbar&quot;}" href="https://github.com/enterprise/advanced-security"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-shield-check color-fg-subtle mr-3"> <path d="M16.53 9.78a.75.75 0 0 0-1.06-1.06L11 13.19l-1.97-1.97a.75.75 0 0 0-1.06 1.06l2.5 2.5a.75.75 0 0 0 1.06 0l5-5Z"></path><path d="m12.54.637 8.25 2.675A1.75 1.75 0 0 1 22 4.976V10c0 6.19-3.771 10.704-9.401 12.83a1.704 1.704 0 0 1-1.198 0C5.77 20.705 2 16.19 2 10V4.976c0-.758.489-1.43 1.21-1.664L11.46.637a1.748 1.748 0 0 1 1.08 0Zm-.617 1.426-8.25 2.676a.249.249 0 0 0-.173.237V10c0 5.46 3.28 9.483 8.43 11.426a.199.199 0 0 0 .14 0C17.22 19.483 20.5 15.461 20.5 10V4.976a.25.25 0 0 0-.173-.237l-8.25-2.676a.253.253 0 0 0-.154 0Z"></path> </svg> <div> <div class="color-fg-default h4">Advanced Security</div> Enterprise-grade security features </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;copilot_for_business&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;copilot_for_business_link_enterprise_navbar&quot;}" href="/features/copilot/copilot-business"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-copilot color-fg-subtle mr-3"> <path d="M23.922 16.992c-.861 1.495-5.859 5.023-11.922 5.023-6.063 0-11.061-3.528-11.922-5.023A.641.641 0 0 1 0 16.736v-2.869a.841.841 0 0 1 .053-.22c.372-.935 1.347-2.292 2.605-2.656.167-.429.414-1.055.644-1.517a10.195 10.195 0 0 1-.052-1.086c0-1.331.282-2.499 1.132-3.368.397-.406.89-.717 1.474-.952 1.399-1.136 3.392-2.093 6.122-2.093 2.731 0 4.767.957 6.166 2.093.584.235 1.077.546 1.474.952.85.869 1.132 2.037 1.132 3.368 0 .368-.014.733-.052 1.086.23.462.477 1.088.644 1.517 1.258.364 2.233 1.721 2.605 2.656a.832.832 0 0 1 .053.22v2.869a.641.641 0 0 1-.078.256ZM12.172 11h-.344a4.323 4.323 0 0 1-.355.508C10.703 12.455 9.555 13 7.965 13c-1.725 0-2.989-.359-3.782-1.259a2.005 2.005 0 0 1-.085-.104L4 11.741v6.585c1.435.779 4.514 2.179 8 2.179 3.486 0 6.565-1.4 8-2.179v-6.585l-.098-.104s-.033.045-.085.104c-.793.9-2.057 1.259-3.782 1.259-1.59 0-2.738-.545-3.508-1.492a4.323 4.323 0 0 1-.355-.508h-.016.016Zm.641-2.935c.136 1.057.403 1.913.878 2.497.442.544 1.134.938 2.344.938 1.573 0 2.292-.337 2.657-.751.384-.435.558-1.15.558-2.361 0-1.14-.243-1.847-.705-2.319-.477-.488-1.319-.862-2.824-1.025-1.487-.161-2.192.138-2.533.529-.269.307-.437.808-.438 1.578v.021c0 .265.021.562.063.893Zm-1.626 0c.042-.331.063-.628.063-.894v-.02c-.001-.77-.169-1.271-.438-1.578-.341-.391-1.046-.69-2.533-.529-1.505.163-2.347.537-2.824 1.025-.462.472-.705 1.179-.705 2.319 0 1.211.175 1.926.558 2.361.365.414 1.084.751 2.657.751 1.21 0 1.902-.394 2.344-.938.475-.584.742-1.44.878-2.497Z"></path><path d="M14.5 14.25a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Zm-5 0a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Z"></path> </svg> <div> <div class="color-fg-default h4">Copilot for business</div> Enterprise-grade AI features </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;premium_support&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;premium_support_link_enterprise_navbar&quot;}" href="/premium-support"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-comment-discussion color-fg-subtle mr-3"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 14.25 14H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 15.543V14H1.75A1.75 1.75 0 0 1 0 12.25v-9.5C0 1.784.784 1 1.75 1ZM1.5 2.75v9.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-9.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Z"></path><path d="M22.5 8.75a.25.25 0 0 0-.25-.25h-3.5a.75.75 0 0 1 0-1.5h3.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 22.25 20H21v1.543a1.457 1.457 0 0 1-2.487 1.03L15.939 20H10.75A1.75 1.75 0 0 1 9 18.25v-1.465a.75.75 0 0 1 1.5 0v1.465c0 .138.112.25.25.25h5.5a.75.75 0 0 1 .53.22l2.72 2.72v-2.19a.75.75 0 0 1 .75-.75h2a.25.25 0 0 0 .25-.25v-9.5Z"></path> </svg> <div> <div class="color-fg-default h4">Premium Support</div> Enterprise-grade 24/7 support </div> </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <a class="HeaderMenu-link no-underline px-0 px-lg-2 py-3 py-lg-2 d-block d-lg-inline-block" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;pricing&quot;,&quot;context&quot;:&quot;global&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;pricing_link_global_navbar&quot;}" href="https://github.com/pricing">Pricing</a> </li> </ul> </nav> <div class="d-flex flex-column flex-lg-row width-full flex-justify-end flex-lg-items-center text-center mt-3 mt-lg-0 text-lg-left ml-lg-3"> <qbsearch-input class="search-input" data-scope="repo:EleutherAI/gpt-neox" data-custom-scopes-path="/search/custom_scopes" data-delete-custom-scopes-csrf="cSOF4dEy1UCGxdoSeJ_xunmx95BjQdtpdVha5GzYP3-Kt0jh75uNnT2YFJQKzvXQgmB3ENlPJlWEPQDnKBXXgA" data-max-custom-scopes="10" data-header-redesign-enabled="false" data-initial-value="" data-blackbird-suggestions-path="/search/suggestions" data-jump-to-suggestions-path="/_graphql/GetSuggestedNavigationDestinations" data-current-repository="EleutherAI/gpt-neox" data-current-org="EleutherAI" data-current-owner="" data-logged-in="false" data-copilot-chat-enabled="false" data-nl-search-enabled="false" data-retain-scroll-position="true"> <div class="search-input-container search-with-dialog position-relative d-flex flex-row flex-items-center mr-4 rounded" data-action="click:qbsearch-input#searchInputContainerClicked" > <button type="button" class="header-search-button placeholder input-button form-control d-flex flex-1 flex-self-stretch flex-items-center no-wrap width-full py-0 pl-2 pr-0 text-left border-0 box-shadow-none" data-target="qbsearch-input.inputButton" aria-label="Search or jump to…" aria-haspopup="dialog" placeholder="Search or jump to..." data-hotkey=s,/ autocapitalize="off" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;searchbar&quot;,&quot;context&quot;:&quot;global&quot;,&quot;tag&quot;:&quot;input&quot;,&quot;label&quot;:&quot;searchbar_input_global_navbar&quot;}" data-action="click:qbsearch-input#handleExpand" > <div class="mr-2 color-fg-muted"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-search"> <path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path> </svg> </div> <span class="flex-1" data-target="qbsearch-input.inputButtonText">Search or jump to...</span> <div class="d-flex" data-target="qbsearch-input.hotkeyIndicator"> <svg xmlns="http://www.w3.org/2000/svg" width="22" height="20" aria-hidden="true" class="mr-1"><path fill="none" stroke="#979A9C" opacity=".4" d="M3.5.5h12c1.7 0 3 1.3 3 3v13c0 1.7-1.3 3-3 3h-12c-1.7 0-3-1.3-3-3v-13c0-1.7 1.3-3 3-3z"></path><path fill="#979A9C" d="M11.8 6L8 15.1h-.9L10.8 6h1z"></path></svg> </div> </button> <input type="hidden" name="type" class="js-site-search-type-field"> <div class="Overlay--hidden " data-modal-dialog-overlay> <modal-dialog data-action="close:qbsearch-input#handleClose cancel:qbsearch-input#handleClose" data-target="qbsearch-input.searchSuggestionsDialog" role="dialog" id="search-suggestions-dialog" aria-modal="true" aria-labelledby="search-suggestions-dialog-header" data-view-component="true" class="Overlay Overlay--width-large Overlay--height-auto"> <h1 id="search-suggestions-dialog-header" class="sr-only">Search code, repositories, users, issues, pull requests...</h1> <div class="Overlay-body Overlay-body--paddingNone"> <div data-view-component="true"> <div class="search-suggestions position-fixed width-full color-shadow-large border color-fg-default color-bg-default overflow-hidden d-flex flex-column query-builder-container" style="border-radius: 12px;" data-target="qbsearch-input.queryBuilderContainer" hidden > <!-- '"` --><!-- </textarea></xmp> --></option></form><form id="query-builder-test-form" action="" accept-charset="UTF-8" method="get"> <query-builder data-target="qbsearch-input.queryBuilder" id="query-builder-query-builder-test" data-filter-key=":" data-view-component="true" class="QueryBuilder search-query-builder"> <div class="FormControl FormControl--fullWidth"> <label id="query-builder-test-label" for="query-builder-test" class="FormControl-label sr-only"> Search </label> <div class="QueryBuilder-StyledInput width-fit " data-target="query-builder.styledInput" > <span id="query-builder-test-leadingvisual-wrap" class="FormControl-input-leadingVisualWrap QueryBuilder-leadingVisualWrap"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-search FormControl-input-leadingVisual"> <path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path> </svg> </span> <div data-target="query-builder.styledInputContainer" class="QueryBuilder-StyledInputContainer"> <div aria-hidden="true" class="QueryBuilder-StyledInputContent" data-target="query-builder.styledInputContent" ></div> <div class="QueryBuilder-InputWrapper"> <div aria-hidden="true" class="QueryBuilder-Sizer" data-target="query-builder.sizer"></div> <input id="query-builder-test" name="query-builder-test" value="" autocomplete="off" type="text" role="combobox" spellcheck="false" aria-expanded="false" aria-describedby="validation-f389797f-19c6-4424-b886-a86542fec86b" data-target="query-builder.input" data-action=" input:query-builder#inputChange blur:query-builder#inputBlur keydown:query-builder#inputKeydown focus:query-builder#inputFocus " data-view-component="true" class="FormControl-input QueryBuilder-Input FormControl-medium" /> </div> </div> <span class="sr-only" id="query-builder-test-clear">Clear</span> <button role="button" id="query-builder-test-clear-button" aria-labelledby="query-builder-test-clear query-builder-test-label" data-target="query-builder.clearButton" data-action=" click:query-builder#clear focus:query-builder#clearButtonFocus blur:query-builder#clearButtonBlur " variant="small" hidden="hidden" type="button" data-view-component="true" class="Button Button--iconOnly Button--invisible Button--medium mr-1 px-2 py-0 d-flex flex-items-center rounded-1 color-fg-muted"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x-circle-fill Button-visual"> <path d="M2.343 13.657A8 8 0 1 1 13.658 2.343 8 8 0 0 1 2.343 13.657ZM6.03 4.97a.751.751 0 0 0-1.042.018.751.751 0 0 0-.018 1.042L6.94 8 4.97 9.97a.749.749 0 0 0 .326 1.275.749.749 0 0 0 .734-.215L8 9.06l1.97 1.97a.749.749 0 0 0 1.275-.326.749.749 0 0 0-.215-.734L9.06 8l1.97-1.97a.749.749 0 0 0-.326-1.275.749.749 0 0 0-.734.215L8 6.94Z"></path> </svg> </button> </div> <template id="search-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-search"> <path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path> </svg> </template> <template id="code-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code"> <path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path> </svg> </template> <template id="file-code-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-file-code"> <path d="M4 1.75C4 .784 4.784 0 5.75 0h5.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v8.586A1.75 1.75 0 0 1 14.25 15h-9a.75.75 0 0 1 0-1.5h9a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 10 4.25V1.5H5.75a.25.25 0 0 0-.25.25v2.5a.75.75 0 0 1-1.5 0Zm1.72 4.97a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734l1.47-1.47-1.47-1.47a.75.75 0 0 1 0-1.06ZM3.28 7.78 1.81 9.25l1.47 1.47a.751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018l-2-2a.75.75 0 0 1 0-1.06l2-2a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Zm8.22-6.218V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path> </svg> </template> <template id="history-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-history"> <path d="m.427 1.927 1.215 1.215a8.002 8.002 0 1 1-1.6 5.685.75.75 0 1 1 1.493-.154 6.5 6.5 0 1 0 1.18-4.458l1.358 1.358A.25.25 0 0 1 3.896 6H.25A.25.25 0 0 1 0 5.75V2.104a.25.25 0 0 1 .427-.177ZM7.75 4a.75.75 0 0 1 .75.75v2.992l2.028.812a.75.75 0 0 1-.557 1.392l-2.5-1A.751.751 0 0 1 7 8.25v-3.5A.75.75 0 0 1 7.75 4Z"></path> </svg> </template> <template id="repo-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo"> <path d="M2 2.5A2.5 2.5 0 0 1 4.5 0h8.75a.75.75 0 0 1 .75.75v12.5a.75.75 0 0 1-.75.75h-2.5a.75.75 0 0 1 0-1.5h1.75v-2h-8a1 1 0 0 0-.714 1.7.75.75 0 1 1-1.072 1.05A2.495 2.495 0 0 1 2 11.5Zm10.5-1h-8a1 1 0 0 0-1 1v6.708A2.486 2.486 0 0 1 4.5 9h8ZM5 12.25a.25.25 0 0 1 .25-.25h3.5a.25.25 0 0 1 .25.25v3.25a.25.25 0 0 1-.4.2l-1.45-1.087a.249.249 0 0 0-.3 0L5.4 15.7a.25.25 0 0 1-.4-.2Z"></path> </svg> </template> <template id="bookmark-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-bookmark"> <path d="M3 2.75C3 1.784 3.784 1 4.75 1h6.5c.966 0 1.75.784 1.75 1.75v11.5a.75.75 0 0 1-1.227.579L8 11.722l-3.773 3.107A.751.751 0 0 1 3 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v9.91l3.023-2.489a.75.75 0 0 1 .954 0l3.023 2.49V2.75a.25.25 0 0 0-.25-.25Z"></path> </svg> </template> <template id="plus-circle-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-plus-circle"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm7.25-3.25v2.5h2.5a.75.75 0 0 1 0 1.5h-2.5v2.5a.75.75 0 0 1-1.5 0v-2.5h-2.5a.75.75 0 0 1 0-1.5h2.5v-2.5a.75.75 0 0 1 1.5 0Z"></path> </svg> </template> <template id="circle-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> </template> <template id="trash-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-trash"> <path d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"></path> </svg> </template> <template id="team-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-people"> <path d="M2 5.5a3.5 3.5 0 1 1 5.898 2.549 5.508 5.508 0 0 1 3.034 4.084.75.75 0 1 1-1.482.235 4 4 0 0 0-7.9 0 .75.75 0 0 1-1.482-.236A5.507 5.507 0 0 1 3.102 8.05 3.493 3.493 0 0 1 2 5.5ZM11 4a3.001 3.001 0 0 1 2.22 5.018 5.01 5.01 0 0 1 2.56 3.012.749.749 0 0 1-.885.954.752.752 0 0 1-.549-.514 3.507 3.507 0 0 0-2.522-2.372.75.75 0 0 1-.574-.73v-.352a.75.75 0 0 1 .416-.672A1.5 1.5 0 0 0 11 5.5.75.75 0 0 1 11 4Zm-5.5-.5a2 2 0 1 0-.001 3.999A2 2 0 0 0 5.5 3.5Z"></path> </svg> </template> <template id="project-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-project"> <path d="M1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25V1.75C0 .784.784 0 1.75 0ZM1.5 1.75v12.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25ZM11.75 3a.75.75 0 0 1 .75.75v7.5a.75.75 0 0 1-1.5 0v-7.5a.75.75 0 0 1 .75-.75Zm-8.25.75a.75.75 0 0 1 1.5 0v5.5a.75.75 0 0 1-1.5 0ZM8 3a.75.75 0 0 1 .75.75v3.5a.75.75 0 0 1-1.5 0v-3.5A.75.75 0 0 1 8 3Z"></path> </svg> </template> <template id="pencil-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-pencil"> <path d="M11.013 1.427a1.75 1.75 0 0 1 2.474 0l1.086 1.086a1.75 1.75 0 0 1 0 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 0 1-.927-.928l.929-3.25c.081-.286.235-.547.445-.758l8.61-8.61Zm.176 4.823L9.75 4.81l-6.286 6.287a.253.253 0 0 0-.064.108l-.558 1.953 1.953-.558a.253.253 0 0 0 .108-.064Zm1.238-3.763a.25.25 0 0 0-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 0 0 0-.354Z"></path> </svg> </template> <template id="copilot-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copilot"> <path d="M7.998 15.035c-4.562 0-7.873-2.914-7.998-3.749V9.338c.085-.628.677-1.686 1.588-2.065.013-.07.024-.143.036-.218.029-.183.06-.384.126-.612-.201-.508-.254-1.084-.254-1.656 0-.87.128-1.769.693-2.484.579-.733 1.494-1.124 2.724-1.261 1.206-.134 2.262.034 2.944.765.05.053.096.108.139.165.044-.057.094-.112.143-.165.682-.731 1.738-.899 2.944-.765 1.23.137 2.145.528 2.724 1.261.566.715.693 1.614.693 2.484 0 .572-.053 1.148-.254 1.656.066.228.098.429.126.612.012.076.024.148.037.218.924.385 1.522 1.471 1.591 2.095v1.872c0 .766-3.351 3.795-8.002 3.795Zm0-1.485c2.28 0 4.584-1.11 5.002-1.433V7.862l-.023-.116c-.49.21-1.075.291-1.727.291-1.146 0-2.059-.327-2.71-.991A3.222 3.222 0 0 1 8 6.303a3.24 3.24 0 0 1-.544.743c-.65.664-1.563.991-2.71.991-.652 0-1.236-.081-1.727-.291l-.023.116v4.255c.419.323 2.722 1.433 5.002 1.433ZM6.762 2.83c-.193-.206-.637-.413-1.682-.297-1.019.113-1.479.404-1.713.7-.247.312-.369.789-.369 1.554 0 .793.129 1.171.308 1.371.162.181.519.379 1.442.379.853 0 1.339-.235 1.638-.54.315-.322.527-.827.617-1.553.117-.935-.037-1.395-.241-1.614Zm4.155-.297c-1.044-.116-1.488.091-1.681.297-.204.219-.359.679-.242 1.614.091.726.303 1.231.618 1.553.299.305.784.54 1.638.54.922 0 1.28-.198 1.442-.379.179-.2.308-.578.308-1.371 0-.765-.123-1.242-.37-1.554-.233-.296-.693-.587-1.713-.7Z"></path><path d="M6.25 9.037a.75.75 0 0 1 .75.75v1.501a.75.75 0 0 1-1.5 0V9.787a.75.75 0 0 1 .75-.75Zm4.25.75v1.501a.75.75 0 0 1-1.5 0V9.787a.75.75 0 0 1 1.5 0Z"></path> </svg> </template> <template id="copilot-error-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copilot-error"> <path d="M16 11.24c0 .112-.072.274-.21.467L13 9.688V7.862l-.023-.116c-.49.21-1.075.291-1.727.291-.198 0-.388-.009-.571-.029L6.833 5.226a4.01 4.01 0 0 0 .17-.782c.117-.935-.037-1.395-.241-1.614-.193-.206-.637-.413-1.682-.297-.683.076-1.115.231-1.395.415l-1.257-.91c.579-.564 1.413-.877 2.485-.996 1.206-.134 2.262.034 2.944.765.05.053.096.108.139.165.044-.057.094-.112.143-.165.682-.731 1.738-.899 2.944-.765 1.23.137 2.145.528 2.724 1.261.566.715.693 1.614.693 2.484 0 .572-.053 1.148-.254 1.656.066.228.098.429.126.612.012.076.024.148.037.218.924.385 1.522 1.471 1.591 2.095Zm-5.083-8.707c-1.044-.116-1.488.091-1.681.297-.204.219-.359.679-.242 1.614.091.726.303 1.231.618 1.553.299.305.784.54 1.638.54.922 0 1.28-.198 1.442-.379.179-.2.308-.578.308-1.371 0-.765-.123-1.242-.37-1.554-.233-.296-.693-.587-1.713-.7Zm2.511 11.074c-1.393.776-3.272 1.428-5.43 1.428-4.562 0-7.873-2.914-7.998-3.749V9.338c.085-.628.677-1.686 1.588-2.065.013-.07.024-.143.036-.218.029-.183.06-.384.126-.612-.18-.455-.241-.963-.252-1.475L.31 4.107A.747.747 0 0 1 0 3.509V3.49a.748.748 0 0 1 .625-.73c.156-.026.306.047.435.139l14.667 10.578a.592.592 0 0 1 .227.264.752.752 0 0 1 .046.249v.022a.75.75 0 0 1-1.19.596Zm-1.367-.991L5.635 7.964a5.128 5.128 0 0 1-.889.073c-.652 0-1.236-.081-1.727-.291l-.023.116v4.255c.419.323 2.722 1.433 5.002 1.433 1.539 0 3.089-.505 4.063-.934Z"></path> </svg> </template> <template id="workflow-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-workflow"> <path d="M0 1.75C0 .784.784 0 1.75 0h3.5C6.216 0 7 .784 7 1.75v3.5A1.75 1.75 0 0 1 5.25 7H4v4a1 1 0 0 0 1 1h4v-1.25C9 9.784 9.784 9 10.75 9h3.5c.966 0 1.75.784 1.75 1.75v3.5A1.75 1.75 0 0 1 14.25 16h-3.5A1.75 1.75 0 0 1 9 14.25v-.75H5A2.5 2.5 0 0 1 2.5 11V7h-.75A1.75 1.75 0 0 1 0 5.25Zm1.75-.25a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Zm9 9a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Z"></path> </svg> </template> <template id="book-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> </template> <template id="code-review-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code-review"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0 1 14.25 13H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 14.543V13H1.75A1.75 1.75 0 0 1 0 11.25v-8.5C0 1.784.784 1 1.75 1ZM1.5 2.75v8.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-8.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Zm5.28 1.72a.75.75 0 0 1 0 1.06L5.31 7l1.47 1.47a.751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018l-2-2a.75.75 0 0 1 0-1.06l2-2a.75.75 0 0 1 1.06 0Zm2.44 0a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L10.69 7 9.22 5.53a.75.75 0 0 1 0-1.06Z"></path> </svg> </template> <template id="codespaces-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-codespaces"> <path d="M0 11.25c0-.966.784-1.75 1.75-1.75h12.5c.966 0 1.75.784 1.75 1.75v3A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25Zm2-9.5C2 .784 2.784 0 3.75 0h8.5C13.216 0 14 .784 14 1.75v5a1.75 1.75 0 0 1-1.75 1.75h-8.5A1.75 1.75 0 0 1 2 6.75Zm1.75-.25a.25.25 0 0 0-.25.25v5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-5a.25.25 0 0 0-.25-.25Zm-2 9.5a.25.25 0 0 0-.25.25v3c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-3a.25.25 0 0 0-.25-.25Z"></path><path d="M7 12.75a.75.75 0 0 1 .75-.75h4.5a.75.75 0 0 1 0 1.5h-4.5a.75.75 0 0 1-.75-.75Zm-4 0a.75.75 0 0 1 .75-.75h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1-.75-.75Z"></path> </svg> </template> <template id="comment-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment"> <path d="M1 2.75C1 1.784 1.784 1 2.75 1h10.5c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 13.25 12H9.06l-2.573 2.573A1.458 1.458 0 0 1 4 13.543V12H2.75A1.75 1.75 0 0 1 1 10.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h4.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"></path> </svg> </template> <template id="comment-discussion-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment-discussion"> <path d="M1.75 1h8.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 10.25 10H7.061l-2.574 2.573A1.458 1.458 0 0 1 2 11.543V10h-.25A1.75 1.75 0 0 1 0 8.25v-5.5C0 1.784.784 1 1.75 1ZM1.5 2.75v5.5c0 .138.112.25.25.25h1a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h3.5a.25.25 0 0 0 .25-.25v-5.5a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25Zm13 2a.25.25 0 0 0-.25-.25h-.5a.75.75 0 0 1 0-1.5h.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 14.25 12H14v1.543a1.458 1.458 0 0 1-2.487 1.03L9.22 12.28a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l2.22 2.22v-2.19a.75.75 0 0 1 .75-.75h1a.25.25 0 0 0 .25-.25Z"></path> </svg> </template> <template id="organization-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-organization"> <path d="M1.75 16A1.75 1.75 0 0 1 0 14.25V1.75C0 .784.784 0 1.75 0h8.5C11.216 0 12 .784 12 1.75v12.5c0 .085-.006.168-.018.25h2.268a.25.25 0 0 0 .25-.25V8.285a.25.25 0 0 0-.111-.208l-1.055-.703a.749.749 0 1 1 .832-1.248l1.055.703c.487.325.779.871.779 1.456v5.965A1.75 1.75 0 0 1 14.25 16h-3.5a.766.766 0 0 1-.197-.026c-.099.017-.2.026-.303.026h-3a.75.75 0 0 1-.75-.75V14h-1v1.25a.75.75 0 0 1-.75.75Zm-.25-1.75c0 .138.112.25.25.25H4v-1.25a.75.75 0 0 1 .75-.75h2.5a.75.75 0 0 1 .75.75v1.25h2.25a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25ZM3.75 6h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5ZM3 3.75A.75.75 0 0 1 3.75 3h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 3 3.75Zm4 3A.75.75 0 0 1 7.75 6h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 7 6.75ZM7.75 3h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5ZM3 9.75A.75.75 0 0 1 3.75 9h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 3 9.75ZM7.75 9h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5Z"></path> </svg> </template> <template id="rocket-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-rocket"> <path d="M14.064 0h.186C15.216 0 16 .784 16 1.75v.186a8.752 8.752 0 0 1-2.564 6.186l-.458.459c-.314.314-.641.616-.979.904v3.207c0 .608-.315 1.172-.833 1.49l-2.774 1.707a.749.749 0 0 1-1.11-.418l-.954-3.102a1.214 1.214 0 0 1-.145-.125L3.754 9.816a1.218 1.218 0 0 1-.124-.145L.528 8.717a.749.749 0 0 1-.418-1.11l1.71-2.774A1.748 1.748 0 0 1 3.31 4h3.204c.288-.338.59-.665.904-.979l.459-.458A8.749 8.749 0 0 1 14.064 0ZM8.938 3.623h-.002l-.458.458c-.76.76-1.437 1.598-2.02 2.5l-1.5 2.317 2.143 2.143 2.317-1.5c.902-.583 1.74-1.26 2.499-2.02l.459-.458a7.25 7.25 0 0 0 2.123-5.127V1.75a.25.25 0 0 0-.25-.25h-.186a7.249 7.249 0 0 0-5.125 2.123ZM3.56 14.56c-.732.732-2.334 1.045-3.005 1.148a.234.234 0 0 1-.201-.064.234.234 0 0 1-.064-.201c.103-.671.416-2.273 1.15-3.003a1.502 1.502 0 1 1 2.12 2.12Zm6.94-3.935c-.088.06-.177.118-.266.175l-2.35 1.521.548 1.783 1.949-1.2a.25.25 0 0 0 .119-.213ZM3.678 8.116 5.2 5.766c.058-.09.117-.178.176-.266H3.309a.25.25 0 0 0-.213.119l-1.2 1.95ZM12 5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> </template> <template id="shield-check-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-shield-check"> <path d="m8.533.133 5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667l5.25-1.68a1.748 1.748 0 0 1 1.066 0Zm-.61 1.429.001.001-5.25 1.68a.251.251 0 0 0-.174.237V7c0 1.36.275 2.666 1.057 3.859.784 1.194 2.121 2.342 4.366 3.298a.196.196 0 0 0 .154 0c2.245-.957 3.582-2.103 4.366-3.297C13.225 9.666 13.5 8.358 13.5 7V3.48a.25.25 0 0 0-.174-.238l-5.25-1.68a.25.25 0 0 0-.153 0ZM11.28 6.28l-3.5 3.5a.75.75 0 0 1-1.06 0l-1.5-1.5a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l.97.97 2.97-2.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> </template> <template id="heart-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-heart"> <path d="m8 14.25.345.666a.75.75 0 0 1-.69 0l-.008-.004-.018-.01a7.152 7.152 0 0 1-.31-.17 22.055 22.055 0 0 1-3.434-2.414C2.045 10.731 0 8.35 0 5.5 0 2.836 2.086 1 4.25 1 5.797 1 7.153 1.802 8 3.02 8.847 1.802 10.203 1 11.75 1 13.914 1 16 2.836 16 5.5c0 2.85-2.045 5.231-3.885 6.818a22.066 22.066 0 0 1-3.744 2.584l-.018.01-.006.003h-.002ZM4.25 2.5c-1.336 0-2.75 1.164-2.75 3 0 2.15 1.58 4.144 3.365 5.682A20.58 20.58 0 0 0 8 13.393a20.58 20.58 0 0 0 3.135-2.211C12.92 9.644 14.5 7.65 14.5 5.5c0-1.836-1.414-3-2.75-3-1.373 0-2.609.986-3.029 2.456a.749.749 0 0 1-1.442 0C6.859 3.486 5.623 2.5 4.25 2.5Z"></path> </svg> </template> <template id="server-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-server"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v4c0 .372-.116.717-.314 1 .198.283.314.628.314 1v4a1.75 1.75 0 0 1-1.75 1.75H1.75A1.75 1.75 0 0 1 0 12.75v-4c0-.358.109-.707.314-1a1.739 1.739 0 0 1-.314-1v-4C0 1.784.784 1 1.75 1ZM1.5 2.75v4c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-4a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Zm.25 5.75a.25.25 0 0 0-.25.25v4c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-4a.25.25 0 0 0-.25-.25ZM7 4.75A.75.75 0 0 1 7.75 4h4.5a.75.75 0 0 1 0 1.5h-4.5A.75.75 0 0 1 7 4.75ZM7.75 10h4.5a.75.75 0 0 1 0 1.5h-4.5a.75.75 0 0 1 0-1.5ZM3 4.75A.75.75 0 0 1 3.75 4h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 3 4.75ZM3.75 10h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5Z"></path> </svg> </template> <template id="globe-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-globe"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM5.78 8.75a9.64 9.64 0 0 0 1.363 4.177c.255.426.542.832.857 1.215.245-.296.551-.705.857-1.215A9.64 9.64 0 0 0 10.22 8.75Zm4.44-1.5a9.64 9.64 0 0 0-1.363-4.177c-.307-.51-.612-.919-.857-1.215a9.927 9.927 0 0 0-.857 1.215A9.64 9.64 0 0 0 5.78 7.25Zm-5.944 1.5H1.543a6.507 6.507 0 0 0 4.666 5.5c-.123-.181-.24-.365-.352-.552-.715-1.192-1.437-2.874-1.581-4.948Zm-2.733-1.5h2.733c.144-2.074.866-3.756 1.58-4.948.12-.197.237-.381.353-.552a6.507 6.507 0 0 0-4.666 5.5Zm10.181 1.5c-.144 2.074-.866 3.756-1.58 4.948-.12.197-.237.381-.353.552a6.507 6.507 0 0 0 4.666-5.5Zm2.733-1.5a6.507 6.507 0 0 0-4.666-5.5c.123.181.24.365.353.552.714 1.192 1.436 2.874 1.58 4.948Z"></path> </svg> </template> <template id="issue-opened-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-issue-opened"> <path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path> </svg> </template> <template id="device-mobile-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-device-mobile"> <path d="M3.75 0h8.5C13.216 0 14 .784 14 1.75v12.5A1.75 1.75 0 0 1 12.25 16h-8.5A1.75 1.75 0 0 1 2 14.25V1.75C2 .784 2.784 0 3.75 0ZM3.5 1.75v12.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25ZM8 13a1 1 0 1 1 0-2 1 1 0 0 1 0 2Z"></path> </svg> </template> <template id="package-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-package"> <path d="m8.878.392 5.25 3.045c.54.314.872.89.872 1.514v6.098a1.75 1.75 0 0 1-.872 1.514l-5.25 3.045a1.75 1.75 0 0 1-1.756 0l-5.25-3.045A1.75 1.75 0 0 1 1 11.049V4.951c0-.624.332-1.201.872-1.514L7.122.392a1.75 1.75 0 0 1 1.756 0ZM7.875 1.69l-4.63 2.685L8 7.133l4.755-2.758-4.63-2.685a.248.248 0 0 0-.25 0ZM2.5 5.677v5.372c0 .09.047.171.125.216l4.625 2.683V8.432Zm6.25 8.271 4.625-2.683a.25.25 0 0 0 .125-.216V5.677L8.75 8.432Z"></path> </svg> </template> <template id="credit-card-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-credit-card"> <path d="M10.75 9a.75.75 0 0 0 0 1.5h1.5a.75.75 0 0 0 0-1.5h-1.5Z"></path><path d="M0 3.75C0 2.784.784 2 1.75 2h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0 1 14.25 14H1.75A1.75 1.75 0 0 1 0 12.25ZM14.5 6.5h-13v5.75c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25Zm0-2.75a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25V5h13Z"></path> </svg> </template> <template id="play-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-play"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm4.879-2.773 4.264 2.559a.25.25 0 0 1 0 .428l-4.264 2.559A.25.25 0 0 1 6 10.559V5.442a.25.25 0 0 1 .379-.215Z"></path> </svg> </template> <template id="gift-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-gift"> <path d="M2 2.75A2.75 2.75 0 0 1 4.75 0c.983 0 1.873.42 2.57 1.232.268.318.497.668.68 1.042.183-.375.411-.725.68-1.044C9.376.42 10.266 0 11.25 0a2.75 2.75 0 0 1 2.45 4h.55c.966 0 1.75.784 1.75 1.75v2c0 .698-.409 1.301-1 1.582v4.918A1.75 1.75 0 0 1 13.25 16H2.75A1.75 1.75 0 0 1 1 14.25V9.332C.409 9.05 0 8.448 0 7.75v-2C0 4.784.784 4 1.75 4h.55c-.192-.375-.3-.8-.3-1.25ZM7.25 9.5H2.5v4.75c0 .138.112.25.25.25h4.5Zm1.5 0v5h4.5a.25.25 0 0 0 .25-.25V9.5Zm0-4V8h5.5a.25.25 0 0 0 .25-.25v-2a.25.25 0 0 0-.25-.25Zm-7 0a.25.25 0 0 0-.25.25v2c0 .138.112.25.25.25h5.5V5.5h-5.5Zm3-4a1.25 1.25 0 0 0 0 2.5h2.309c-.233-.818-.542-1.401-.878-1.793-.43-.502-.915-.707-1.431-.707ZM8.941 4h2.309a1.25 1.25 0 0 0 0-2.5c-.516 0-1 .205-1.43.707-.337.392-.646.975-.879 1.793Z"></path> </svg> </template> <template id="code-square-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code-square"> <path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25Zm7.47 3.97a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L10.69 8 9.22 6.53a.75.75 0 0 1 0-1.06ZM6.78 6.53 5.31 8l1.47 1.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-2-2a.75.75 0 0 1 0-1.06l2-2a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> </template> <template id="device-desktop-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-device-desktop"> <path d="M14.25 1c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 14.25 12h-3.727c.099 1.041.52 1.872 1.292 2.757A.752.752 0 0 1 11.25 16h-6.5a.75.75 0 0 1-.565-1.243c.772-.885 1.192-1.716 1.292-2.757H1.75A1.75 1.75 0 0 1 0 10.25v-7.5C0 1.784.784 1 1.75 1ZM1.75 2.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25ZM9.018 12H6.982a5.72 5.72 0 0 1-.765 2.5h3.566a5.72 5.72 0 0 1-.765-2.5Z"></path> </svg> </template> <div class="position-relative"> <ul role="listbox" class="ActionListWrap QueryBuilder-ListWrap" aria-label="Suggestions" data-action=" combobox-commit:query-builder#comboboxCommit mousedown:query-builder#resultsMousedown " data-target="query-builder.resultsList" data-persist-list=false id="query-builder-test-results" ></ul> </div> <div class="FormControl-inlineValidation" id="validation-f389797f-19c6-4424-b886-a86542fec86b" hidden="hidden"> <span class="FormControl-inlineValidation--visual"> <svg aria-hidden="true" height="12" viewBox="0 0 12 12" version="1.1" width="12" data-view-component="true" class="octicon octicon-alert-fill"> <path d="M4.855.708c.5-.896 1.79-.896 2.29 0l4.675 8.351a1.312 1.312 0 0 1-1.146 1.954H1.33A1.313 1.313 0 0 1 .183 9.058ZM7 7V3H5v4Zm-1 3a1 1 0 1 0 0-2 1 1 0 0 0 0 2Z"></path> </svg> </span> <span></span> </div> </div> <div data-target="query-builder.screenReaderFeedback" aria-live="polite" aria-atomic="true" class="sr-only"></div> </query-builder></form> <div class="d-flex flex-row color-fg-muted px-3 text-small color-bg-default search-feedback-prompt"> <a target="_blank" href="https://docs.github.com/search-github/github-code-search/understanding-github-code-search-syntax" data-view-component="true" class="Link color-fg-accent text-normal ml-2">Search syntax tips</a> <div class="d-flex flex-1"></div> </div> </div> </div> </div> </modal-dialog></div> </div> <div data-action="click:qbsearch-input#retract" class="dark-backdrop position-fixed" hidden data-target="qbsearch-input.darkBackdrop"></div> <div class="color-fg-default"> <dialog-helper> <dialog data-target="qbsearch-input.feedbackDialog" data-action="close:qbsearch-input#handleDialogClose cancel:qbsearch-input#handleDialogClose" id="feedback-dialog" aria-modal="true" aria-labelledby="feedback-dialog-title" aria-describedby="feedback-dialog-description" data-view-component="true" class="Overlay Overlay-whenNarrow Overlay--size-medium Overlay--motion-scaleFade Overlay--disableScroll"> <div data-view-component="true" class="Overlay-header"> <div class="Overlay-headerContentWrap"> <div class="Overlay-titleWrap"> <h1 class="Overlay-title " id="feedback-dialog-title"> Provide feedback </h1> </div> <div class="Overlay-actionWrap"> <button data-close-dialog-id="feedback-dialog" aria-label="Close" type="button" data-view-component="true" class="close-button Overlay-closeButton"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg></button> </div> </div> </div> <scrollable-region data-labelled-by="feedback-dialog-title"> <div data-view-component="true" class="Overlay-body"> <!-- '"` --><!-- </textarea></xmp> --></option></form><form id="code-search-feedback-form" data-turbo="false" action="/search/feedback" accept-charset="UTF-8" method="post"><input type="hidden" data-csrf="true" name="authenticity_token" value="PEmf9ha2TG2Gv0wVqrW6ew/k3fKHg/3EXhyWY7iajPFiG10us0loyGLC5MHrjmav2qSBsnMMza73AbtisMnf/w==" /> <p>We read every piece of feedback, and take your input very seriously.</p> <textarea name="feedback" class="form-control width-full mb-2" style="height: 120px" id="feedback"></textarea> <input name="include_email" id="include_email" aria-label="Include my email address so I can be contacted" class="form-control mr-2" type="checkbox"> <label for="include_email" style="font-weight: normal">Include my email address so I can be contacted</label> </form></div> </scrollable-region> <div data-view-component="true" class="Overlay-footer Overlay-footer--alignEnd"> <button data-close-dialog-id="feedback-dialog" type="button" data-view-component="true" class="btn"> Cancel </button> <button form="code-search-feedback-form" data-action="click:qbsearch-input#submitFeedback" type="submit" data-view-component="true" class="btn-primary btn"> Submit feedback </button> </div> </dialog></dialog-helper> <custom-scopes data-target="qbsearch-input.customScopesManager"> <dialog-helper> <dialog data-target="custom-scopes.customScopesModalDialog" data-action="close:qbsearch-input#handleDialogClose cancel:qbsearch-input#handleDialogClose" id="custom-scopes-dialog" aria-modal="true" aria-labelledby="custom-scopes-dialog-title" aria-describedby="custom-scopes-dialog-description" data-view-component="true" class="Overlay Overlay-whenNarrow Overlay--size-medium Overlay--motion-scaleFade Overlay--disableScroll"> <div data-view-component="true" class="Overlay-header Overlay-header--divided"> <div class="Overlay-headerContentWrap"> <div class="Overlay-titleWrap"> <h1 class="Overlay-title " id="custom-scopes-dialog-title"> Saved searches </h1> <h2 id="custom-scopes-dialog-description" class="Overlay-description">Use saved searches to filter your results more quickly</h2> </div> <div class="Overlay-actionWrap"> <button data-close-dialog-id="custom-scopes-dialog" aria-label="Close" type="button" data-view-component="true" class="close-button Overlay-closeButton"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg></button> </div> </div> </div> <scrollable-region data-labelled-by="custom-scopes-dialog-title"> <div data-view-component="true" class="Overlay-body"> <div data-target="custom-scopes.customScopesModalDialogFlash"></div> <div hidden class="create-custom-scope-form" data-target="custom-scopes.createCustomScopeForm"> <!-- '"` --><!-- </textarea></xmp> --></option></form><form id="custom-scopes-dialog-form" data-turbo="false" action="/search/custom_scopes" accept-charset="UTF-8" method="post"><input type="hidden" data-csrf="true" name="authenticity_token" value="dwXiyRvAHu0mjZuKEwcUKTjo1VfGaAaGmB8CHOU1kjlEWgEPUSmkOQbg6JBDcqHdvOkHwPfHfwxK9ohxCi5L6Q==" /> <div data-target="custom-scopes.customScopesModalDialogFlash"></div> <input type="hidden" id="custom_scope_id" name="custom_scope_id" data-target="custom-scopes.customScopesIdField"> <div class="form-group"> <label for="custom_scope_name">Name</label> <auto-check src="/search/custom_scopes/check_name" required only-validate-on-blur="false"> <input type="text" name="custom_scope_name" id="custom_scope_name" data-target="custom-scopes.customScopesNameField" class="form-control" autocomplete="off" placeholder="github-ruby" required maxlength="50"> <input type="hidden" data-csrf="true" value="mmZE5sAsib4/rpoPthVVAyRy9rL8XYxPvb7P2Sdsu+rVXx1Nu+byVU5OCwrf8HM8Ce8ajTPFPqnSxYa7Bz5kFA==" /> </auto-check> </div> <div class="form-group"> <label for="custom_scope_query">Query</label> <input type="text" name="custom_scope_query" id="custom_scope_query" data-target="custom-scopes.customScopesQueryField" class="form-control" autocomplete="off" placeholder="(repo:mona/a OR repo:mona/b) AND lang:python" required maxlength="500"> </div> <p class="text-small color-fg-muted"> To see all available qualifiers, see our <a class="Link--inTextBlock" href="https://docs.github.com/search-github/github-code-search/understanding-github-code-search-syntax">documentation</a>. </p> </form> </div> <div data-target="custom-scopes.manageCustomScopesForm"> <div data-target="custom-scopes.list"></div> </div> </div> </scrollable-region> <div data-view-component="true" class="Overlay-footer Overlay-footer--alignEnd Overlay-footer--divided"> <button data-action="click:custom-scopes#customScopesCancel" type="button" data-view-component="true" class="btn"> Cancel </button> <button form="custom-scopes-dialog-form" data-action="click:custom-scopes#customScopesSubmit" data-target="custom-scopes.customScopesSubmitButton" type="submit" data-view-component="true" class="btn-primary btn"> Create saved search </button> </div> </dialog></dialog-helper> </custom-scopes> </div> </qbsearch-input> <div class="position-relative HeaderMenu-link-wrap d-lg-inline-block"> <a href="/login?return_to=https%3A%2F%2Fgithub.com%2FEleutherAI%2Fgpt-neox" class="HeaderMenu-link HeaderMenu-link--sign-in HeaderMenu-button flex-shrink-0 no-underline d-none d-lg-inline-flex border border-lg-0 rounded rounded-lg-0 px-2 py-1" style="margin-left: 12px;" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;site header menu&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;SIGN_UP&quot;,&quot;originating_url&quot;:&quot;https://github.com/EleutherAI/gpt-neox&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="afbdec1f18e0e13751cb576bdd5462cc2e92b3082e6a415bb0b214c35051b005" data-analytics-event="{&quot;category&quot;:&quot;Marketing nav&quot;,&quot;action&quot;:&quot;click to go to homepage&quot;,&quot;label&quot;:&quot;ref_page:Marketing;ref_cta:Sign in;ref_loc:Header&quot;}" > Sign in </a> </div> <a href="/signup?ref_cta=Sign+up&amp;ref_loc=header+logged+out&amp;ref_page=%2F%3Cuser-name%3E%2F%3Crepo-name%3E&amp;source=header-repo&amp;source_repo=EleutherAI%2Fgpt-neox" class="HeaderMenu-link HeaderMenu-link--sign-up HeaderMenu-button flex-shrink-0 d-flex d-lg-inline-flex no-underline border color-border-default rounded px-2 py-1" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;site header menu&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;SIGN_UP&quot;,&quot;originating_url&quot;:&quot;https://github.com/EleutherAI/gpt-neox&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="afbdec1f18e0e13751cb576bdd5462cc2e92b3082e6a415bb0b214c35051b005" data-analytics-event="{&quot;category&quot;:&quot;Sign up&quot;,&quot;action&quot;:&quot;click to sign up for account&quot;,&quot;label&quot;:&quot;ref_page:/&lt;user-name&gt;/&lt;repo-name&gt;;ref_cta:Sign up;ref_loc:header logged out&quot;}" > Sign up </a> <button type="button" class="sr-only js-header-menu-focus-trap d-block d-lg-none">Reseting focus</button> </div> </div> </div> </div> </header> <div hidden="hidden" data-view-component="true" class="js-stale-session-flash stale-session-flash flash flash-warn flash-full"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-alert"> <path d="M6.457 1.047c.659-1.234 2.427-1.234 3.086 0l6.082 11.378A1.75 1.75 0 0 1 14.082 15H1.918a1.75 1.75 0 0 1-1.543-2.575Zm1.763.707a.25.25 0 0 0-.44 0L1.698 13.132a.25.25 0 0 0 .22.368h12.164a.25.25 0 0 0 .22-.368Zm.53 3.996v2.5a.75.75 0 0 1-1.5 0v-2.5a.75.75 0 0 1 1.5 0ZM9 11a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> <span class="js-stale-session-flash-signed-in" hidden>You signed in with another tab or window. <a class="Link--inTextBlock" href="">Reload</a> to refresh your session.</span> <span class="js-stale-session-flash-signed-out" hidden>You signed out in another tab or window. <a class="Link--inTextBlock" href="">Reload</a> to refresh your session.</span> <span class="js-stale-session-flash-switched" hidden>You switched accounts on another tab or window. <a class="Link--inTextBlock" href="">Reload</a> to refresh your session.</span> <button id="icon-button-4586bc7f-005d-43c7-91ca-12a43f4ce918" aria-labelledby="tooltip-de8a692a-be76-4f67-a74c-2cc2dbf646d3" type="button" data-view-component="true" class="Button Button--iconOnly Button--invisible Button--medium flash-close js-flash-close"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x Button-visual"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button><tool-tip id="tooltip-de8a692a-be76-4f67-a74c-2cc2dbf646d3" for="icon-button-4586bc7f-005d-43c7-91ca-12a43f4ce918" popover="manual" data-direction="s" data-type="label" data-view-component="true" class="sr-only position-absolute">Dismiss alert</tool-tip> </div> </div> <div id="start-of-content" class="show-on-focus"></div> <div id="js-flash-container" class="flash-container" data-turbo-replace> <template class="js-flash-template"> <div class="flash flash-full {{ className }}"> <div > <button autofocus class="flash-close js-flash-close" type="button" aria-label="Dismiss this message"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> <div aria-atomic="true" role="alert" class="js-flash-alert"> <div>{{ message }}</div> </div> </div> </div> </template> </div> <div class="application-main " data-commit-hovercards-enabled data-discussion-hovercards-enabled data-issue-and-pr-hovercards-enabled data-project-hovercards-enabled > <div itemscope itemtype="http://schema.org/SoftwareSourceCode" class=""> <main id="js-repo-pjax-container" > <div id="repository-container-header" class="pt-3 hide-full-screen" style="background-color: var(--page-header-bgColor, var(--color-page-header-bg));" data-turbo-replace> <div class="d-flex flex-nowrap flex-justify-end mb-3 px-3 px-lg-5" style="gap: 1rem;"> <div class="flex-auto min-width-0 width-fit"> <div class=" d-flex flex-wrap flex-items-center wb-break-word f3 text-normal"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo color-fg-muted mr-2"> <path d="M2 2.5A2.5 2.5 0 0 1 4.5 0h8.75a.75.75 0 0 1 .75.75v12.5a.75.75 0 0 1-.75.75h-2.5a.75.75 0 0 1 0-1.5h1.75v-2h-8a1 1 0 0 0-.714 1.7.75.75 0 1 1-1.072 1.05A2.495 2.495 0 0 1 2 11.5Zm10.5-1h-8a1 1 0 0 0-1 1v6.708A2.486 2.486 0 0 1 4.5 9h8ZM5 12.25a.25.25 0 0 1 .25-.25h3.5a.25.25 0 0 1 .25.25v3.25a.25.25 0 0 1-.4.2l-1.45-1.087a.249.249 0 0 0-.3 0L5.4 15.7a.25.25 0 0 1-.4-.2Z"></path> </svg> <span class="author flex-self-stretch" itemprop="author"> <a class="url fn" rel="author" data-hovercard-type="organization" data-hovercard-url="/orgs/EleutherAI/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" href="/EleutherAI"> EleutherAI </a> </span> <span class="mx-1 flex-self-stretch color-fg-muted">/</span> <strong itemprop="name" class="mr-2 flex-self-stretch"> <a data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" href="/EleutherAI/gpt-neox">gpt-neox</a> </strong> <span></span><span class="Label Label--secondary v-align-middle mr-1">Public</span> </div> </div> <div id="repository-details-container" class="flex-shrink-0" data-turbo-replace style="max-width: 70%;"> <ul class="pagehead-actions flex-shrink-0 d-none d-md-inline" style="padding: 2px 0;"> <li> <a href="/login?return_to=%2FEleutherAI%2Fgpt-neox" rel="nofollow" id="repository-details-watch-button" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;notification subscription menu watch&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/EleutherAI/gpt-neox&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="b43d413e2c4a7be485554206f6181c61fcc827783aafd663ae904d155cc43093" aria-label="You must be signed in to change notification settings" data-view-component="true" class="btn-sm btn"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-bell mr-2"> <path d="M8 16a2 2 0 0 0 1.985-1.75c.017-.137-.097-.25-.235-.25h-3.5c-.138 0-.252.113-.235.25A2 2 0 0 0 8 16ZM3 5a5 5 0 0 1 10 0v2.947c0 .05.015.098.042.139l1.703 2.555A1.519 1.519 0 0 1 13.482 13H2.518a1.516 1.516 0 0 1-1.263-2.36l1.703-2.554A.255.255 0 0 0 3 7.947Zm5-3.5A3.5 3.5 0 0 0 4.5 5v2.947c0 .346-.102.683-.294.97l-1.703 2.556a.017.017 0 0 0-.003.01l.001.006c0 .002.002.004.004.006l.006.004.007.001h10.964l.007-.001.006-.004.004-.006.001-.007a.017.017 0 0 0-.003-.01l-1.703-2.554a1.745 1.745 0 0 1-.294-.97V5A3.5 3.5 0 0 0 8 1.5Z"></path> </svg>Notifications </a> <tool-tip id="tooltip-2dacee58-117c-47e9-8aa3-1f27c5174999" for="repository-details-watch-button" popover="manual" data-direction="s" data-type="description" data-view-component="true" class="sr-only position-absolute">You must be signed in to change notification settings</tool-tip> </li> <li> <a icon="repo-forked" id="fork-button" href="/login?return_to=%2FEleutherAI%2Fgpt-neox" rel="nofollow" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;repo details fork button&quot;,&quot;repository_id&quot;:323651234,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/EleutherAI/gpt-neox&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="85751ad66c7991bd4a8d6a1b5cb9a35d9ef949d7c7a9476e3b1762ca0507bfe8" data-view-component="true" class="btn-sm btn"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo-forked mr-2"> <path d="M5 5.372v.878c0 .414.336.75.75.75h4.5a.75.75 0 0 0 .75-.75v-.878a2.25 2.25 0 1 1 1.5 0v.878a2.25 2.25 0 0 1-2.25 2.25h-1.5v2.128a2.251 2.251 0 1 1-1.5 0V8.5h-1.5A2.25 2.25 0 0 1 3.5 6.25v-.878a2.25 2.25 0 1 1 1.5 0ZM5 3.25a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Zm6.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5Zm-3 8.75a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Z"></path> </svg>Fork <span id="repo-network-counter" data-pjax-replace="true" data-turbo-replace="true" title="1,045" data-view-component="true" class="Counter">1k</span> </a> </li> <li> <div data-view-component="true" class="BtnGroup d-flex"> <a href="/login?return_to=%2FEleutherAI%2Fgpt-neox" rel="nofollow" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;star button&quot;,&quot;repository_id&quot;:323651234,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/EleutherAI/gpt-neox&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="e082eda8662ad38fb361a883d38a5bcf26c2a761e641fb89d7f897e27aeec796" aria-label="You must be signed in to star a repository" data-view-component="true" class="tooltipped tooltipped-sw btn-sm btn"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star v-align-text-bottom d-inline-block mr-2"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg><span data-view-component="true" class="d-inline"> Star </span> <span id="repo-stars-counter-star" aria-label="7142 users starred this repository" data-singular-suffix="user starred this repository" data-plural-suffix="users starred this repository" data-turbo-replace="true" title="7,142" data-view-component="true" class="Counter js-social-count">7.1k</span> </a></div> </li> </ul> </div> </div> <div id="responsive-meta-container" data-turbo-replace> <div class="d-block d-md-none mb-2 px-3 px-md-4 px-lg-5"> <p class="f4 mb-3 "> An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries </p> <div class="mb-2 d-flex flex-items-center Link--secondary"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link flex-shrink-0 mr-2"> <path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path> </svg> <span class="flex-auto min-width-0 css-truncate css-truncate-target width-fit"> <a title="https://www.eleuther.ai/" role="link" target="_blank" class="text-bold" rel="noopener noreferrer" href="https://www.eleuther.ai/">www.eleuther.ai/</a> </span> </div> <h3 class="sr-only">License</h3> <div class="mb-2"> <a href="/EleutherAI/gpt-neox/blob/main/LICENSE" class="Link--muted" data-analytics-event="{&quot;category&quot;:&quot;Repository Overview&quot;,&quot;action&quot;:&quot;click&quot;,&quot;label&quot;:&quot;location:sidebar;file:license&quot;}" > <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-law mr-2"> <path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"></path> </svg> Apache-2.0 license </a> </div> <div class="mb-3"> <a class="Link--secondary no-underline mr-3" href="/EleutherAI/gpt-neox/stargazers"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star mr-1"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg> <span class="text-bold">7.1k</span> stars </a> <a class="Link--secondary no-underline mr-3" href="/EleutherAI/gpt-neox/forks"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo-forked mr-1"> <path d="M5 5.372v.878c0 .414.336.75.75.75h4.5a.75.75 0 0 0 .75-.75v-.878a2.25 2.25 0 1 1 1.5 0v.878a2.25 2.25 0 0 1-2.25 2.25h-1.5v2.128a2.251 2.251 0 1 1-1.5 0V8.5h-1.5A2.25 2.25 0 0 1 3.5 6.25v-.878a2.25 2.25 0 1 1 1.5 0ZM5 3.25a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Zm6.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5Zm-3 8.75a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Z"></path> </svg> <span class="text-bold">1k</span> forks </a> <a class="Link--secondary no-underline mr-3 d-inline-block" href="/EleutherAI/gpt-neox/branches"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-git-branch mr-1"> <path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path> </svg> <span>Branches</span> </a> <a class="Link--secondary no-underline d-inline-block" href="/EleutherAI/gpt-neox/tags"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-tag mr-1"> <path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path> </svg> <span>Tags</span> </a> <a class="Link--secondary no-underline d-inline-block" href="/EleutherAI/gpt-neox/activity"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-pulse mr-1"> <path d="M6 2c.306 0 .582.187.696.471L10 10.731l1.304-3.26A.751.751 0 0 1 12 7h3.25a.75.75 0 0 1 0 1.5h-2.742l-1.812 4.528a.751.751 0 0 1-1.392 0L6 4.77 4.696 8.03A.75.75 0 0 1 4 8.5H.75a.75.75 0 0 1 0-1.5h2.742l1.812-4.529A.751.751 0 0 1 6 2Z"></path> </svg> <span>Activity</span> </a> </div> <div class="d-flex flex-wrap gap-2"> <div class="flex-1"> <div data-view-component="true" class="BtnGroup d-flex"> <a href="/login?return_to=%2FEleutherAI%2Fgpt-neox" rel="nofollow" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;star button&quot;,&quot;repository_id&quot;:323651234,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/EleutherAI/gpt-neox&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="e082eda8662ad38fb361a883d38a5bcf26c2a761e641fb89d7f897e27aeec796" aria-label="You must be signed in to star a repository" data-view-component="true" class="tooltipped tooltipped-sw btn-sm btn btn-block"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star v-align-text-bottom d-inline-block mr-2"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg><span data-view-component="true" class="d-inline"> Star </span> </a></div> </div> <div class="flex-1"> <a href="/login?return_to=%2FEleutherAI%2Fgpt-neox" rel="nofollow" id="files-overview-watch-button" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;notification subscription menu watch&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/EleutherAI/gpt-neox&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="b43d413e2c4a7be485554206f6181c61fcc827783aafd663ae904d155cc43093" aria-label="You must be signed in to change notification settings" data-view-component="true" class="btn-sm btn btn-block"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-bell mr-2"> <path d="M8 16a2 2 0 0 0 1.985-1.75c.017-.137-.097-.25-.235-.25h-3.5c-.138 0-.252.113-.235.25A2 2 0 0 0 8 16ZM3 5a5 5 0 0 1 10 0v2.947c0 .05.015.098.042.139l1.703 2.555A1.519 1.519 0 0 1 13.482 13H2.518a1.516 1.516 0 0 1-1.263-2.36l1.703-2.554A.255.255 0 0 0 3 7.947Zm5-3.5A3.5 3.5 0 0 0 4.5 5v2.947c0 .346-.102.683-.294.97l-1.703 2.556a.017.017 0 0 0-.003.01l.001.006c0 .002.002.004.004.006l.006.004.007.001h10.964l.007-.001.006-.004.004-.006.001-.007a.017.017 0 0 0-.003-.01l-1.703-2.554a1.745 1.745 0 0 1-.294-.97V5A3.5 3.5 0 0 0 8 1.5Z"></path> </svg>Notifications </a> <tool-tip id="tooltip-6f9d1306-b69d-4302-8524-d49b70c22922" for="files-overview-watch-button" popover="manual" data-direction="s" data-type="description" data-view-component="true" class="sr-only position-absolute">You must be signed in to change notification settings</tool-tip> </div> <span> </span> </div> </div> </div> <nav data-pjax="#js-repo-pjax-container" aria-label="Repository" data-view-component="true" class="js-repo-nav js-sidenav-container-pjax js-responsive-underlinenav overflow-hidden UnderlineNav px-3 px-md-4 px-lg-5"> <ul data-view-component="true" class="UnderlineNav-body list-style-none"> <li data-view-component="true" class="d-inline-flex"> <a id="code-tab" href="/EleutherAI/gpt-neox" data-tab-item="i0code-tab" data-selected-links="repo_source repo_downloads repo_commits repo_releases repo_tags repo_branches repo_packages repo_deployments repo_attestations /EleutherAI/gpt-neox" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g c" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Code&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" aria-current="page" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item selected"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code UnderlineNav-octicon d-none d-sm-inline"> <path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path> </svg> <span data-content="Code">Code</span> <span id="code-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="issues-tab" href="/EleutherAI/gpt-neox/issues" data-tab-item="i1issues-tab" data-selected-links="repo_issues repo_labels repo_milestones /EleutherAI/gpt-neox/issues" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g i" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Issues&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-issue-opened UnderlineNav-octicon d-none d-sm-inline"> <path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path> </svg> <span data-content="Issues">Issues</span> <span id="issues-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="64" data-view-component="true" class="Counter">64</span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="pull-requests-tab" href="/EleutherAI/gpt-neox/pulls" data-tab-item="i2pull-requests-tab" data-selected-links="repo_pulls checks /EleutherAI/gpt-neox/pulls" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g p" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Pull requests&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-git-pull-request UnderlineNav-octicon d-none d-sm-inline"> <path d="M1.5 3.25a2.25 2.25 0 1 1 3 2.122v5.256a2.251 2.251 0 1 1-1.5 0V5.372A2.25 2.25 0 0 1 1.5 3.25Zm5.677-.177L9.573.677A.25.25 0 0 1 10 .854V2.5h1A2.5 2.5 0 0 1 13.5 5v5.628a2.251 2.251 0 1 1-1.5 0V5a1 1 0 0 0-1-1h-1v1.646a.25.25 0 0 1-.427.177L7.177 3.427a.25.25 0 0 1 0-.354ZM3.75 2.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm0 9.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm8.25.75a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Z"></path> </svg> <span data-content="Pull requests">Pull requests</span> <span id="pull-requests-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="24" data-view-component="true" class="Counter">24</span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="actions-tab" href="/EleutherAI/gpt-neox/actions" data-tab-item="i3actions-tab" data-selected-links="repo_actions /EleutherAI/gpt-neox/actions" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g a" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Actions&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-play UnderlineNav-octicon d-none d-sm-inline"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm4.879-2.773 4.264 2.559a.25.25 0 0 1 0 .428l-4.264 2.559A.25.25 0 0 1 6 10.559V5.442a.25.25 0 0 1 .379-.215Z"></path> </svg> <span data-content="Actions">Actions</span> <span id="actions-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="projects-tab" href="/EleutherAI/gpt-neox/projects" data-tab-item="i4projects-tab" data-selected-links="repo_projects new_repo_project repo_project /EleutherAI/gpt-neox/projects" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g b" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Projects&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-table UnderlineNav-octicon d-none d-sm-inline"> <path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25ZM6.5 6.5v8h7.75a.25.25 0 0 0 .25-.25V6.5Zm8-1.5V1.75a.25.25 0 0 0-.25-.25H6.5V5Zm-13 1.5v7.75c0 .138.112.25.25.25H5v-8ZM5 5V1.5H1.75a.25.25 0 0 0-.25.25V5Z"></path> </svg> <span data-content="Projects">Projects</span> <span id="projects-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="0" hidden="hidden" data-view-component="true" class="Counter">0</span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="wiki-tab" href="/EleutherAI/gpt-neox/wiki" data-tab-item="i5wiki-tab" data-selected-links="repo_wiki /EleutherAI/gpt-neox/wiki" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g w" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Wiki&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book UnderlineNav-octicon d-none d-sm-inline"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> <span data-content="Wiki">Wiki</span> <span id="wiki-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="security-tab" href="/EleutherAI/gpt-neox/security" data-tab-item="i6security-tab" data-selected-links="security overview alerts policy token_scanning code_scanning /EleutherAI/gpt-neox/security" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g s" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Security&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-shield UnderlineNav-octicon d-none d-sm-inline"> <path d="M7.467.133a1.748 1.748 0 0 1 1.066 0l5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667Zm.61 1.429a.25.25 0 0 0-.153 0l-5.25 1.68a.25.25 0 0 0-.174.238V7c0 1.358.275 2.666 1.057 3.86.784 1.194 2.121 2.34 4.366 3.297a.196.196 0 0 0 .154 0c2.245-.956 3.582-2.104 4.366-3.298C13.225 9.666 13.5 8.36 13.5 7V3.48a.251.251 0 0 0-.174-.237l-5.25-1.68ZM8.75 4.75v3a.75.75 0 0 1-1.5 0v-3a.75.75 0 0 1 1.5 0ZM9 10.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> <span data-content="Security">Security</span> <include-fragment src="/EleutherAI/gpt-neox/security/overall-count" accept="text/fragment+html"></include-fragment> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="insights-tab" href="/EleutherAI/gpt-neox/pulse" data-tab-item="i7insights-tab" data-selected-links="repo_graphs repo_contributors dependency_graph dependabot_updates pulse people community /EleutherAI/gpt-neox/pulse" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Insights&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-graph UnderlineNav-octicon d-none d-sm-inline"> <path d="M1.5 1.75V13.5h13.75a.75.75 0 0 1 0 1.5H.75a.75.75 0 0 1-.75-.75V1.75a.75.75 0 0 1 1.5 0Zm14.28 2.53-5.25 5.25a.75.75 0 0 1-1.06 0L7 7.06 4.28 9.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.25-3.25a.75.75 0 0 1 1.06 0L10 7.94l4.72-4.72a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> <span data-content="Insights">Insights</span> <span id="insights-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> </ul> <div style="visibility:hidden;" data-view-component="true" class="UnderlineNav-actions js-responsive-underlinenav-overflow position-absolute pr-3 pr-md-4 pr-lg-5 right-0"> <action-menu data-select-variant="none" data-view-component="true"> <focus-group direction="vertical" mnemonics retain> <button id="action-menu-4c4b488f-5c02-4a93-98c8-ae476e2d229b-button" popovertarget="action-menu-4c4b488f-5c02-4a93-98c8-ae476e2d229b-overlay" aria-controls="action-menu-4c4b488f-5c02-4a93-98c8-ae476e2d229b-list" aria-haspopup="true" aria-labelledby="tooltip-1e6c970c-fe32-47ec-a716-f41ce44ebc79" type="button" data-view-component="true" class="Button Button--iconOnly Button--secondary Button--medium UnderlineNav-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-kebab-horizontal Button-visual"> <path d="M8 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3ZM1.5 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Zm13 0a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path> </svg> </button><tool-tip id="tooltip-1e6c970c-fe32-47ec-a716-f41ce44ebc79" for="action-menu-4c4b488f-5c02-4a93-98c8-ae476e2d229b-button" popover="manual" data-direction="s" data-type="label" data-view-component="true" class="sr-only position-absolute">Additional navigation options</tool-tip> <anchored-position data-target="action-menu.overlay" id="action-menu-4c4b488f-5c02-4a93-98c8-ae476e2d229b-overlay" anchor="action-menu-4c4b488f-5c02-4a93-98c8-ae476e2d229b-button" align="start" side="outside-bottom" anchor-offset="normal" popover="auto" data-view-component="true"> <div data-view-component="true" class="Overlay Overlay--size-auto"> <div data-view-component="true" class="Overlay-body Overlay-body--paddingNone"> <action-list> <div data-view-component="true"> <ul aria-labelledby="action-menu-4c4b488f-5c02-4a93-98c8-ae476e2d229b-button" id="action-menu-4c4b488f-5c02-4a93-98c8-ae476e2d229b-list" role="menu" data-view-component="true" class="ActionListWrap--inset ActionListWrap"> <li hidden="hidden" data-menu-item="i0code-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-c1b9fc95-2f34-422b-b776-a8e9b8e79327" href="/EleutherAI/gpt-neox" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code"> <path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Code </span> </a> </li> <li hidden="hidden" data-menu-item="i1issues-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-33299b99-3c05-4bc1-ba04-96fa7e3648b6" href="/EleutherAI/gpt-neox/issues" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-issue-opened"> <path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Issues </span> </a> </li> <li hidden="hidden" data-menu-item="i2pull-requests-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-b227bb6a-5158-4580-9d87-35bd0b393598" href="/EleutherAI/gpt-neox/pulls" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-git-pull-request"> <path d="M1.5 3.25a2.25 2.25 0 1 1 3 2.122v5.256a2.251 2.251 0 1 1-1.5 0V5.372A2.25 2.25 0 0 1 1.5 3.25Zm5.677-.177L9.573.677A.25.25 0 0 1 10 .854V2.5h1A2.5 2.5 0 0 1 13.5 5v5.628a2.251 2.251 0 1 1-1.5 0V5a1 1 0 0 0-1-1h-1v1.646a.25.25 0 0 1-.427.177L7.177 3.427a.25.25 0 0 1 0-.354ZM3.75 2.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm0 9.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm8.25.75a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Pull requests </span> </a> </li> <li hidden="hidden" data-menu-item="i3actions-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-c837a2bb-0f1e-4644-85df-a5828c987a03" href="/EleutherAI/gpt-neox/actions" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-play"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm4.879-2.773 4.264 2.559a.25.25 0 0 1 0 .428l-4.264 2.559A.25.25 0 0 1 6 10.559V5.442a.25.25 0 0 1 .379-.215Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Actions </span> </a> </li> <li hidden="hidden" data-menu-item="i4projects-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-69e57378-e0e9-487f-85e3-5ecff2c5ae2c" href="/EleutherAI/gpt-neox/projects" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-table"> <path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25ZM6.5 6.5v8h7.75a.25.25 0 0 0 .25-.25V6.5Zm8-1.5V1.75a.25.25 0 0 0-.25-.25H6.5V5Zm-13 1.5v7.75c0 .138.112.25.25.25H5v-8ZM5 5V1.5H1.75a.25.25 0 0 0-.25.25V5Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Projects </span> </a> </li> <li hidden="hidden" data-menu-item="i5wiki-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-4abd49c0-3008-46c8-a42f-b8ea464005ec" href="/EleutherAI/gpt-neox/wiki" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Wiki </span> </a> </li> <li hidden="hidden" data-menu-item="i6security-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-ac9e24af-855e-4698-a400-5630925a9b26" href="/EleutherAI/gpt-neox/security" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-shield"> <path d="M7.467.133a1.748 1.748 0 0 1 1.066 0l5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667Zm.61 1.429a.25.25 0 0 0-.153 0l-5.25 1.68a.25.25 0 0 0-.174.238V7c0 1.358.275 2.666 1.057 3.86.784 1.194 2.121 2.34 4.366 3.297a.196.196 0 0 0 .154 0c2.245-.956 3.582-2.104 4.366-3.298C13.225 9.666 13.5 8.36 13.5 7V3.48a.251.251 0 0 0-.174-.237l-5.25-1.68ZM8.75 4.75v3a.75.75 0 0 1-1.5 0v-3a.75.75 0 0 1 1.5 0ZM9 10.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Security </span> </a> </li> <li hidden="hidden" data-menu-item="i7insights-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-8fb65b69-0c3d-43e0-9487-3b3eb5506a49" href="/EleutherAI/gpt-neox/pulse" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-graph"> <path d="M1.5 1.75V13.5h13.75a.75.75 0 0 1 0 1.5H.75a.75.75 0 0 1-.75-.75V1.75a.75.75 0 0 1 1.5 0Zm14.28 2.53-5.25 5.25a.75.75 0 0 1-1.06 0L7 7.06 4.28 9.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.25-3.25a.75.75 0 0 1 1.06 0L10 7.94l4.72-4.72a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Insights </span> </a> </li> </ul> </div></action-list> </div> </div></anchored-position> </focus-group> </action-menu></div> </nav> </div> <turbo-frame id="repo-content-turbo-frame" target="_top" data-turbo-action="advance" class=""> <div id="repo-content-pjax-container" class="repository-content " > <h1 class='sr-only'>EleutherAI/gpt-neox</h1> <div class="clearfix container-xl px-md-4 px-lg-5 px-3"> <div> <div style="max-width: 100%" data-view-component="true" class="Layout Layout--flowRow-until-md react-repos-overview-margin Layout--sidebarPosition-end Layout--sidebarPosition-flowRow-end"> <div data-view-component="true" class="Layout-main"> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_dompurify_dist_purify_es_mjs-dd1d3ea6a436.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_tanstack_query-core_build_modern_queryObserver_js-node_modules_tanstack_-defd52-843b41414e0e.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_aria-live_aria-live_ts-ui_packages_promise-with-resolvers-polyfill_promise-with-r-17c672-34345cb18aac.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_paths_index_ts-e019c54eb886.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_ref-selector_RefSelector_tsx-7496afc3784d.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_commit-attribution_index_ts-ui_packages_commit-checks-status_index_ts-ui_packages-7094d4-15017f02e61c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_code-view-shared_hooks_use-canonical-object_ts-ui_packages_code-view-shared_hooks-5f1d09-1ee828c2d6e8.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/repos-overview-d245eae13daf.js"></script> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.9df1783473f10f02fb62.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/repos-overview.0ee7cac3ab511a65d9f9.module.css" /> <react-partial partial-name="repos-overview" data-ssr="true" data-attempted-ssr="true" > <script type="application/json" data-target="react-partial.embeddedData">{"props":{"initialPayload":{"allShortcutsEnabled":false,"path":"/","repo":{"id":323651234,"defaultBranch":"main","name":"gpt-neox","ownerLogin":"EleutherAI","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2020-12-22T14:37:54.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/68924597?v=4","public":true,"private":false,"isOrgOwned":true},"currentUser":null,"refInfo":{"name":"main","listCacheKey":"v0:1742676635.0","canEdit":false,"refType":"branch","currentOid":"f84b54e20361772cf97da87ba48960b741954065"},"tree":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"configs","path":"configs","contentType":"directory"},{"name":"eval_tasks","path":"eval_tasks","contentType":"directory"},{"name":"images","path":"images","contentType":"directory"},{"name":"megatron","path":"megatron","contentType":"directory"},{"name":"post-training","path":"post-training","contentType":"directory"},{"name":"requirements","path":"requirements","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"tools","path":"tools","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".dockerignore","path":".dockerignore","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".pre-commit-config.yaml","path":".pre-commit-config.yaml","contentType":"file"},{"name":"CITATION.cff","path":"CITATION.cff","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"Dockerfile","path":"Dockerfile","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"MANIFEST.in","path":"MANIFEST.in","contentType":"file"},{"name":"README-MUP.md","path":"README-MUP.md","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"deepy.py","path":"deepy.py","contentType":"file"},{"name":"docker-compose-dockerhub.yml","path":"docker-compose-dockerhub.yml","contentType":"file"},{"name":"docker-compose.yml","path":"docker-compose.yml","contentType":"file"},{"name":"eval.py","path":"eval.py","contentType":"file"},{"name":"generate.py","path":"generate.py","contentType":"file"},{"name":"prepare_data.py","path":"prepare_data.py","contentType":"file"},{"name":"train.py","path":"train.py","contentType":"file"}],"templateDirectorySuggestionUrl":null,"readme":null,"totalCount":27,"showBranchInfobar":false},"fileTree":null,"fileTreeProcessingTime":null,"foldersToFetch":[],"treeExpanded":false,"symbolsExpanded":false,"isOverview":true,"overview":{"banners":{"shouldRecommendReadme":false,"isPersonalRepo":false,"showUseActionBanner":false,"actionSlug":null,"actionId":null,"showProtectBranchBanner":false,"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_repo","releasePath":"/EleutherAI/gpt-neox/releases/new?marketplace=true","showPublishActionBanner":false},"interactionLimitBanner":null,"showInvitationBanner":false,"inviterName":null,"actionsMigrationBannerInfo":{"releaseTags":[],"showImmutableActionsMigrationBanner":false,"initialMigrationStatus":null}},"codeButton":{"contactPath":"/contact","isEnterprise":false,"local":{"protocolInfo":{"httpAvailable":true,"sshAvailable":null,"httpUrl":"https://github.com/EleutherAI/gpt-neox.git","showCloneWarning":null,"sshUrl":null,"sshCertificatesRequired":null,"sshCertificatesAvailable":null,"ghCliUrl":"gh repo clone EleutherAI/gpt-neox","defaultProtocol":"http","newSshKeyUrl":"/settings/ssh/new","setProtocolPath":"/users/set_protocol"},"platformInfo":{"cloneUrl":"https://desktop.github.com","showVisualStudioCloneButton":false,"visualStudioCloneUrl":"https://windows.github.com","showXcodeCloneButton":false,"xcodeCloneUrl":"xcode://clone?repo=https%3A%2F%2Fgithub.com%2FEleutherAI%2Fgpt-neox","zipballUrl":"/EleutherAI/gpt-neox/archive/refs/heads/main.zip"}},"newCodespacePath":"/codespaces/new?hide_repo_select=true\u0026repo=323651234"},"popovers":{"rename":null,"renamedParentRepo":null},"commitCount":"2,317","overviewFiles":[{"displayName":"README.md","repoName":"gpt-neox","refName":"main","path":"README.md","preferredFileType":"readme","tabName":"README","richText":"\u003carticle class=\"markdown-body entry-content container-lg\" itemprop=\"text\"\u003e\u003cp dir=\"auto\"\u003e\u003ca href=\"https://github.com/EleutherAI/gpt-neox/issues\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/f20c8011c48ae5af128033c913aa078aa1931fc213b41fd81ceb87f84414d71b/68747470733a2f2f696d672e736869656c64732e696f2f6769746875622f6973737565732f456c65757468657241492f6770742d6e656f78\" alt=\"GitHub issues\" data-canonical-src=\"https://img.shields.io/github/issues/EleutherAI/gpt-neox\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\n\u003ca href=\"https://wandb.ai/eleutherai/neox\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg\" alt=\"Weights \u0026amp; Biases monitoring\" height=\"20\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eGPT-NeoX\u003c/h1\u003e\u003ca id=\"user-content-gpt-neox\" class=\"anchor\" aria-label=\"Permalink: GPT-NeoX\" href=\"#gpt-neox\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThis repository records \u003ca href=\"https://www.eleuther.ai\" rel=\"nofollow\"\u003eEleutherAI\u003c/a\u003e's library for training large-scale language models on GPUs. Our current framework is based on NVIDIA's \u003ca href=\"https://github.com/NVIDIA/Megatron-LM\"\u003eMegatron Language Model\u003c/a\u003e and has been augmented with techniques from \u003ca href=\"https://www.deepspeed.ai\" rel=\"nofollow\"\u003eDeepSpeed\u003c/a\u003e as well as some novel optimizations. We aim to make this repo a centralized and accessible place to gather techniques for training large-scale autoregressive language models, and accelerate research into large-scale training. This library is in widespread use in \u003ca href=\"https://github.com/EleutherAI/gpt-neox#adoption-and-publications\"\u003eacademic, industry, and government labs\u003c/a\u003e, including by researchers at Oak Ridge National Lab, CarperAI, Stability AI, Together.ai, Korea University, Carnegie Mellon University, and the University of Tokyo among others. Uniquely among similar libraries GPT-NeoX supports a wide variety of systems and hardwares, including launching via Slurm, MPI, and the IBM Job Step Manager, and has been run at scale on \u003ca href=\"https://aws.amazon.com/\" rel=\"nofollow\"\u003eAWS\u003c/a\u003e, \u003ca href=\"https://www.coreweave.com/\" rel=\"nofollow\"\u003eCoreWeave\u003c/a\u003e, \u003ca href=\"https://www.olcf.ornl.gov/summit/\" rel=\"nofollow\"\u003eORNL Summit\u003c/a\u003e, \u003ca href=\"https://www.olcf.ornl.gov/frontier/\" rel=\"nofollow\"\u003eORNL Frontier\u003c/a\u003e, \u003ca href=\"https://www.lumi-supercomputer.eu/\" rel=\"nofollow\"\u003eLUMI\u003c/a\u003e, and others.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003eIf you are not looking to train models with billions of parameters from scratch, this is likely the wrong library to use. For generic inference needs, we recommend you use the Hugging Face \u003ccode\u003etransformers\u003c/code\u003e library instead which supports GPT-NeoX models.\u003c/strong\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eWhy GPT-NeoX?\u003c/h2\u003e\u003ca id=\"user-content-why-gpt-neox\" class=\"anchor\" aria-label=\"Permalink: Why GPT-NeoX?\" href=\"#why-gpt-neox\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eGPT-NeoX leverages many of the same features and technologies as the popular Megatron-DeepSpeed library but with substantially increased usability and novel optimizations. Major features include:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eDistributed training with ZeRO and 3D parallelism\u003c/li\u003e\n\u003cli\u003eA wide variety of systems and hardwares, including launching via Slurm, MPI, and the IBM Job Step Manager, and has been run at scale on \u003ca href=\"https://aws.amazon.com/\" rel=\"nofollow\"\u003eAWS\u003c/a\u003e, \u003ca href=\"https://www.coreweave.com/\" rel=\"nofollow\"\u003eCoreWeave\u003c/a\u003e, Oak Ridge's \u003ca href=\"https://www.olcf.ornl.gov/summit/\" rel=\"nofollow\"\u003eSummit\u003c/a\u003e and \u003ca href=\"https://www.olcf.ornl.gov/frontier/\" rel=\"nofollow\"\u003eFrontier\u003c/a\u003e, \u003ca href=\"https://hpc.pnl.gov/index.shtml\" rel=\"nofollow\"\u003ePacific Northwest National Laboratory\u003c/a\u003e, Argonne's \u003ca href=\"https://docs.alcf.anl.gov/polaris/data-science-workflows/applications/gpt-neox/\" rel=\"nofollow\"\u003ePolaris\u003c/a\u003e, \u003ca href=\"https://www.lumi-supercomputer.eu/\" rel=\"nofollow\"\u003eLUMI\u003c/a\u003e, and more.\u003c/li\u003e\n\u003cli\u003eCutting edge architectural innovations including rotary and alibi positional embeddings, parallel feedforward attention layers, and flash attention.\u003c/li\u003e\n\u003cli\u003ePredefined configurations for popular architectures including Pythia, PaLM, Falcon, and LLaMA 1 \u0026amp; 2\u003c/li\u003e\n\u003cli\u003eCurriculum Learning\u003c/li\u003e\n\u003cli\u003eEasy connections with the open source ecosystem, including Hugging Face's \u003ca href=\"https://github.com/huggingface/tokenizers\"\u003etokenizers\u003c/a\u003e and \u003ca href=\"https://github.com/huggingface/transformers/\"\u003etransformers\u003c/a\u003e libraries, monitor experiments via \u003ca href=\"https://wandb.ai/site\" rel=\"nofollow\"\u003eWandB\u003c/a\u003e/\u003ca href=\"https://www.comet.com/site/\" rel=\"nofollow\"\u003eComet\u003c/a\u003e/TensorBoard, and evaluation via our \u003ca href=\"https://github.com/EleutherAI/lm-evaluation-harness\"\u003eLanguage Model Evaluation Harness\u003c/a\u003e.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eNews\u003c/h2\u003e\u003ca id=\"user-content-news\" class=\"anchor\" aria-label=\"Permalink: News\" href=\"#news\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[10/9/2024]\u003c/strong\u003e We now support \u003ca href=\"https://github.com/NVIDIA/TransformerEngine\"\u003eTransformer Engine\u003c/a\u003e integration\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[9/9/2024]\u003c/strong\u003e We now support preference learning via \u003ca href=\"https://arxiv.org/abs/2305.18290\" rel=\"nofollow\"\u003eDPO\u003c/a\u003e, \u003ca href=\"https://arxiv.org/abs/2402.01306\" rel=\"nofollow\"\u003eKTO\u003c/a\u003e, and reward modeling\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[9/9/2024]\u003c/strong\u003e We now support integration with \u003ca href=\"https://www.comet.com/site/\" rel=\"nofollow\"\u003eComet ML\u003c/a\u003e, a machine learning monitoring platform\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[5/21/2024]\u003c/strong\u003e We now support \u003ca href=\"https://www.rwkv.com/\" rel=\"nofollow\"\u003eRWKV\u003c/a\u003e with pipeline parallelism!. See the PRs for \u003ca href=\"https://github.com/EleutherAI/gpt-neox/pull/1198\" data-hovercard-type=\"pull_request\" data-hovercard-url=\"/EleutherAI/gpt-neox/pull/1198/hovercard\"\u003eRWKV\u003c/a\u003e and \u003ca href=\"https://github.com/EleutherAI/gpt-neox/pull/1221\" data-hovercard-type=\"pull_request\" data-hovercard-url=\"/EleutherAI/gpt-neox/pull/1221/hovercard\"\u003eRWKV+pipeline\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[3/21/2024]\u003c/strong\u003e We now support Mixture-of-Experts (MoE)\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[3/17/2024]\u003c/strong\u003e We now support AMD MI250X GPUs\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[3/15/2024]\u003c/strong\u003e We now support \u003ca href=\"https://github.com/state-spaces/mamba\"\u003eMamba\u003c/a\u003e with tensor parallelism! See \u003ca href=\"https://github.com/EleutherAI/gpt-neox/pull/1184\" data-hovercard-type=\"pull_request\" data-hovercard-url=\"/EleutherAI/gpt-neox/pull/1184/hovercard\"\u003ethe PR\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[8/10/2023]\u003c/strong\u003e We now support checkpointing with AWS S3! Activate with the \u003ccode\u003es3_path\u003c/code\u003e config option (for more detail, see \u003ca href=\"https://github.com/EleutherAI/gpt-neox/pull/1010\" data-hovercard-type=\"pull_request\" data-hovercard-url=\"/EleutherAI/gpt-neox/pull/1010/hovercard\"\u003ethe PR\u003c/a\u003e)\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[9/20/2023]\u003c/strong\u003e As of \u003ca class=\"issue-link js-issue-link\" data-error-text=\"Failed to load title\" data-id=\"1901736264\" data-permission-text=\"Title is private\" data-url=\"https://github.com/EleutherAI/gpt-neox/issues/1035\" data-hovercard-type=\"pull_request\" data-hovercard-url=\"/EleutherAI/gpt-neox/pull/1035/hovercard\" href=\"https://github.com/EleutherAI/gpt-neox/pull/1035\"\u003e#1035\u003c/a\u003e, we have deprecated Flash Attention 0.x and 1.x, and migrated support to Flash Attention 2.x. We don't believe this will cause problems, but if you have a specific use-case that requires old flash support using the latest GPT-NeoX, please raise an issue.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[8/10/2023]\u003c/strong\u003e We have experimental support for LLaMA 2 and Flash Attention v2 supported in our \u003ca href=\"https://github.com/EleutherAI/math-lm\"\u003emath-lm\u003c/a\u003e project that will be upstreamed later this month.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[5/17/2023]\u003c/strong\u003e After fixing some miscellaneous bugs we now fully support bf16.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[4/11/2023]\u003c/strong\u003e We have upgraded our Flash Attention implementation to now support Alibi positional embeddings.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003e[3/9/2023]\u003c/strong\u003e We have released GPT-NeoX 2.0.0, an upgraded version built on the latest DeepSpeed which will be regularly synced with going forward.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVersions\u003c/h2\u003e\u003ca id=\"user-content-versions\" class=\"anchor\" aria-label=\"Permalink: Versions\" href=\"#versions\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003ePrior to 3/9/2023, GPT-NeoX relied on \u003ca href=\"https://github.com/EleutherAI/DeeperSpeed\"\u003eDeeperSpeed\u003c/a\u003e, which was based on an old version of DeepSpeed (0.3.15). In order to migrate to the latest upstream DeepSpeed version while allowing users to access the old versions of GPT-NeoX and DeeperSpeed, we have introduced two versioned releases for both libraries:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eVersion 2.0 of \u003ca href=\"https://github.com/EleutherAI/gpt-neox/releases/tag/v2.0\"\u003eGPT-NeoX\u003c/a\u003e and \u003ca href=\"https://github.com/EleutherAI/DeeperSpeed/releases/tag/v2.0\"\u003eDeeperSpeed\u003c/a\u003e are the latest versions built on the latest DeepSpeed, and will be maintained going forward.\u003c/li\u003e\n\u003cli\u003eVersion 1.0 of \u003ca href=\"https://github.com/EleutherAI/gpt-neox/releases/tag/v1.0\"\u003eGPT-NeoX\u003c/a\u003e and \u003ca href=\"https://github.com/EleutherAI/DeeperSpeed/releases/tag/v1.0\"\u003eDeeperSpeed\u003c/a\u003e maintain snapshots of the old stable versions that \u003ca href=\"https://arxiv.org/abs/2204.06745\" rel=\"nofollow\"\u003eGPT-NeoX-20B\u003c/a\u003e and the \u003ca href=\"https://github.com/EleutherAI/pythia\"\u003ePythia Suite\u003c/a\u003e were trained on.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eContents\u003c/h1\u003e\u003ca id=\"user-content-contents\" class=\"anchor\" aria-label=\"Permalink: Contents\" href=\"#contents\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#gpt-neox\"\u003eGPT-NeoX\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#why-gpt-neox\"\u003eWhy GPT-NeoX?\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#news\"\u003eNews\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#versions\"\u003eVersions\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#contents\"\u003eContents\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#quick-start\"\u003eQuick Start\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#environment-and-dependencies\"\u003eEnvironment and Dependencies\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#host-setup\"\u003eHost Setup\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#flash-attention\"\u003eFlash Attention\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#transformer-engine\"\u003eTransformer Engine\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#multi-node-launching\"\u003eMulti-Node Launching\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#containerized-setup\"\u003eContainerized Setup\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#usage\"\u003eUsage\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#configuration\"\u003eConfiguration\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#mixture-of-experts\"\u003eMixture of Experts\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#datasets\"\u003eDatasets\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#preconfigured-datasets\"\u003ePreconfigured Datasets\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#using-custom-data\"\u003eUsing Custom Data\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#training-and-finetuning\"\u003eTraining and Finetuning\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#pretrained-models\"\u003ePretrained Models\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#gpt-neox-20b\"\u003eGPT-NeoX-20B\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#pythia\"\u003ePythia\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#polyglot\"\u003ePolyglot\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#inference\"\u003eInference\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluation\"\u003eEvaluation\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#exporting-to-hugging-face\"\u003eExporting to Hugging Face\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#monitoring\"\u003eMonitoring\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#weights-and-biases\"\u003eWeights and Biases\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#tensorboard\"\u003eTensorBoard\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#running-on-multi-node\"\u003eRunning on multi-node\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#profiling\"\u003eProfiling\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#adoption-and-publications\"\u003eAdoption and Publications\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#publications\"\u003ePublications\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#models\"\u003eModels\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#english-llms\"\u003eEnglish LLMs\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#non-english-llms\"\u003eNon-English LLMs\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#code-models\"\u003eCode Models\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#other-modalities\"\u003eOther Modalities\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#administrative-notes\"\u003eAdministrative Notes\u003c/a\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#citing-gpt-neox\"\u003eCiting GPT-NeoX\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#contributing\"\u003eContributing\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#licensing\"\u003eLicensing\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#acknowledgements\"\u003eAcknowledgements\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eQuick Start\u003c/h1\u003e\u003ca id=\"user-content-quick-start\" class=\"anchor\" aria-label=\"Permalink: Quick Start\" href=\"#quick-start\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eEnvironment and Dependencies\u003c/h2\u003e\u003ca id=\"user-content-environment-and-dependencies\" class=\"anchor\" aria-label=\"Permalink: Environment and Dependencies\" href=\"#environment-and-dependencies\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eHost Setup\u003c/h3\u003e\u003ca id=\"user-content-host-setup\" class=\"anchor\" aria-label=\"Permalink: Host Setup\" href=\"#host-setup\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThis codebase has primarily developed and tested for Python 3.8-3.10, and PyTorch 1.8-2.0. This is not a strict requirement, and other versions and combinations of libraries may work.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo install the remaining basic dependencies, run:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"pip install -r requirements/requirements.txt\npip install -r requirements/requirements-wandb.txt # optional, if logging using WandB\npip install -r requirements/requirements-tensorboard.txt # optional, if logging via tensorboard\npip install -r requirements/requirements-comet.txt # optional, if logging via Comet\"\u003e\u003cpre\u003epip install -r requirements/requirements.txt\npip install -r requirements/requirements-wandb.txt \u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e optional, if logging using WandB\u003c/span\u003e\npip install -r requirements/requirements-tensorboard.txt \u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e optional, if logging via tensorboard\u003c/span\u003e\npip install -r requirements/requirements-comet.txt \u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e optional, if logging via Comet\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003efrom the repository root.\u003c/p\u003e\n\u003cdiv class=\"markdown-alert markdown-alert-warning\" dir=\"auto\"\u003e\u003cp class=\"markdown-alert-title\" dir=\"auto\"\u003e\u003csvg class=\"octicon octicon-alert mr-2\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"M6.457 1.047c.659-1.234 2.427-1.234 3.086 0l6.082 11.378A1.75 1.75 0 0 1 14.082 15H1.918a1.75 1.75 0 0 1-1.543-2.575Zm1.763.707a.25.25 0 0 0-.44 0L1.698 13.132a.25.25 0 0 0 .22.368h12.164a.25.25 0 0 0 .22-.368Zm.53 3.996v2.5a.75.75 0 0 1-1.5 0v-2.5a.75.75 0 0 1 1.5 0ZM9 11a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z\"\u003e\u003c/path\u003e\u003c/svg\u003eWarning\u003c/p\u003e\u003cp dir=\"auto\"\u003eOur codebase relies on \u003ca href=\"https://github.com/EleutherAI/DeeperSpeed\"\u003eDeeperSpeed\u003c/a\u003e, our fork of the \u003ca href=\"https://github.com/microsoft/DeepSpeed\"\u003eDeepSpeed\u003c/a\u003e library with some added changes. We strongly recommend using Anaconda, a virtual machine, or some other form of environment isolation before continuing. Failure to do so may cause other repositories that rely on DeepSpeed to break.\u003c/p\u003e\n\u003c/div\u003e\n\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eFused Kernels\u003c/h3\u003e\u003ca id=\"user-content-fused-kernels\" class=\"anchor\" aria-label=\"Permalink: Fused Kernels\" href=\"#fused-kernels\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe now support AMD GPUs (MI100, MI250X) through JIT fused-kernel compilation. Fused kernels will be built and loaded as needed. To avoid waiting during job launching, you can also do the following for manual pre-build:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"python\nfrom megatron.fused_kernels import load\nload()\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003epython\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003emegatron\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003efused_kernels\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eload\u003c/span\u003e\n\u003cspan class=\"pl-en\"\u003eload\u003c/span\u003e()\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThis will automatically adapts building process over different GPU vendors (AMD, NVIDIA) without platform specific code changes. To further test fused kernels using \u003ccode\u003epytest\u003c/code\u003e, use \u003ccode\u003epytest tests/model/test_fused_kernels.py\u003c/code\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eFlash Attention\u003c/h3\u003e\u003ca id=\"user-content-flash-attention\" class=\"anchor\" aria-label=\"Permalink: Flash Attention\" href=\"#flash-attention\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo use \u003ca href=\"https://github.com/HazyResearch/flash-attention\"\u003eFlash-Attention\u003c/a\u003e, install the additional dependencies in \u003ccode\u003e./requirements/requirements-flashattention.txt\u003c/code\u003e or use a PyTorch NGC container with it pre-installed (note that functionality is not guaranteed using versions different from our requirements file). Then set the attention type in your configuration accordingly (see \u003ca href=\"/EleutherAI/gpt-neox/blob/main/configs\"\u003econfigs\u003c/a\u003e). This can provide significant speed-ups over regular attention on certain GPU architectures, including Ampere GPUs (such as A100s); see the repository for more details.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eTransformer Engine\u003c/h3\u003e\u003ca id=\"user-content-transformer-engine\" class=\"anchor\" aria-label=\"Permalink: Transformer Engine\" href=\"#transformer-engine\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo use \u003ca href=\"https://github.com/NVIDIA/TransformerEngine\"\u003eTransformer Engine (TE)\u003c/a\u003e, install the additional dependencies in \u003ccode\u003e./requirements/requirements-transformer-engine.txt\u003c/code\u003e or use a PyTorch NGC container with it pre-installed (note that functionality is not guaranteed using versions different from our requirements file). See \u003ca href=\"https://github.com/EleutherAI/gpt-neox/blob/main/configs/1-3B-transformer-engine.yml\"\u003ethis config\u003c/a\u003e for an example of using TE on a 1.3B model. This can provide significant speed-ups over regular attention on certain GPU architectures, including Ampere and Hopper GPUs; see the repository for more details.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTE provides very efficient kernels for both A100 and H100 GPUs. We've run some sample ablations on A100:\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eand H100:\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eMulti-Node Launching\u003c/h3\u003e\u003ca id=\"user-content-multi-node-launching\" class=\"anchor\" aria-label=\"Permalink: Multi-Node Launching\" href=\"#multi-node-launching\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNeoX and Deep(er)Speed support training on multiple different nodes and you have the option of using a variety of different launchers to orchestrate multi-node jobs.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn general there needs to be a \"hostfile\" somewhere accessible with the format:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"node1_ip slots=8\nnode2_ip slots=8\"\u003e\u003cpre\u003enode1_ip slots=8\nnode2_ip slots=8\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003ewhere the first column contains the IP address for each node in your setup and the number of slots is the number of GPUs that node has access to. In your config you must pass in the path to the hostfile with \u003ccode\u003e\"hostfile\": \"/path/to/hostfile\"\u003c/code\u003e. Alternatively the path to the hostfile can be in the environment variable \u003ccode\u003eDLTS_HOSTFILE\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003epdsh\u003c/h4\u003e\u003ca id=\"user-content-pdsh\" class=\"anchor\" aria-label=\"Permalink: pdsh\" href=\"#pdsh\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ccode\u003epdsh\u003c/code\u003e is the default launcher, and if you're using \u003ccode\u003epdsh\u003c/code\u003e then all you must do (besides ensuring that pdsh is installed in your environment) is set \u003ccode\u003e{\"launcher\": \"pdsh\"}\u003c/code\u003e in your config files.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eMPI\u003c/h4\u003e\u003ca id=\"user-content-mpi\" class=\"anchor\" aria-label=\"Permalink: MPI\" href=\"#mpi\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIf using MPI then you must specify the MPI library (DeepSpeed/GPT-NeoX currently supports \u003ccode\u003emvapich\u003c/code\u003e, \u003ccode\u003eopenmpi\u003c/code\u003e, \u003ccode\u003empich\u003c/code\u003e, and \u003ccode\u003eimpi\u003c/code\u003e, though \u003ccode\u003eopenmpi\u003c/code\u003e is the most commonly used and tested) as well as pass the \u003ccode\u003edeepspeed_mpi\u003c/code\u003e flag in your config file:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-json notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"{\n \u0026quot;launcher\u0026quot;: \u0026quot;openmpi\u0026quot;,\n \u0026quot;deepspeed_mpi\u0026quot;: true\n}\"\u003e\u003cpre\u003e{\n \u003cspan class=\"pl-ent\"\u003e\"launcher\"\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003eopenmpi\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-ent\"\u003e\"deepspeed_mpi\"\u003c/span\u003e: \u003cspan class=\"pl-c1\"\u003etrue\u003c/span\u003e\n}\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWith your environment properly set up and the correct configuration files you can use \u003ccode\u003edeepy.py\u003c/code\u003e like a normal python script and start (for example) a training job with:\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ccode\u003epython3 deepy.py train.py /path/to/configs/my_model.yml\u003c/code\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eSlurm\u003c/h4\u003e\u003ca id=\"user-content-slurm\" class=\"anchor\" aria-label=\"Permalink: Slurm\" href=\"#slurm\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eUsing Slurm can be slightly more involved. Like with MPI, you must add the following to your config:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-json notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"{\n \u0026quot;launcher\u0026quot;: \u0026quot;slurm\u0026quot;,\n \u0026quot;deepspeed_slurm\u0026quot;: true\n}\"\u003e\u003cpre\u003e{\n \u003cspan class=\"pl-ent\"\u003e\"launcher\"\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003eslurm\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-ent\"\u003e\"deepspeed_slurm\"\u003c/span\u003e: \u003cspan class=\"pl-c1\"\u003etrue\u003c/span\u003e\n}\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIf you do not have ssh access to the compute nodes in your Slurm cluster you need to add \u003ccode\u003e{\"no_ssh_check\": true}\u003c/code\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e(Advanced) Custom Launching\u003c/h4\u003e\u003ca id=\"user-content-advanced-custom-launching\" class=\"anchor\" aria-label=\"Permalink: (Advanced) Custom Launching\" href=\"#advanced-custom-launching\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThere are many cases where the above default launching options are not sufficient\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eMany clusters have their own unique job scheduler or specific MPI/Slurm arguments necessary for launching jobs such as \u003ca href=\"https://docs.olcf.ornl.gov/systems/summit_user_guide.html#job-launcher-jsrun\" rel=\"nofollow\"\u003eSummit JSRun\u003c/a\u003e or \u003ca href=\"https://computing.llnl.gov/projects/flux-building-framework-resource-management\" rel=\"nofollow\"\u003eLLNL Flux\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eWhile the above Slurm/MPI/pdsh default options are enough for most job runs, advanced users may want to add arguments for optimization or debugging purposes\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eIn these cases, you will need to modify the DeepSpeed \u003ca href=\"https://github.com/microsoft/DeepSpeed/blob/17957728c0362bf8ae70feca308e491e55ef9feb/deepspeed/launcher/multinode_runner.py\"\u003emultinode runner\u003c/a\u003e utility to support your usecase. Broadly, these enhancements fall under two categories:\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch5 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e1. Adding a Launcher (e.g. \u003ca href=\"https://docs.olcf.ornl.gov/systems/summit_user_guide.html#job-launcher-jsrun\" rel=\"nofollow\"\u003eJSRun\u003c/a\u003e, \u003ca href=\"https://computing.llnl.gov/projects/flux-building-framework-resource-management\" rel=\"nofollow\"\u003eFlux\u003c/a\u003e, etc)\u003c/h5\u003e\u003ca id=\"user-content-1-adding-a-launcher-eg-jsrun-flux-etc\" class=\"anchor\" aria-label=\"Permalink: 1. Adding a Launcher (e.g. JSRun, Flux, etc)\" href=\"#1-adding-a-launcher-eg-jsrun-flux-etc\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIn this case, you must add a new multinode runner class to \u003ccode\u003edeepspeed/launcher/multinode_runner.py\u003c/code\u003e and expose it as a configuration option in GPT-NeoX. Examples on how we did this for \u003ca href=\"https://docs.olcf.ornl.gov/systems/summit_user_guide.html#job-launcher-jsrun\" rel=\"nofollow\"\u003eSummit JSRun\u003c/a\u003e are in \u003ca href=\"https://github.com/EleutherAI/DeeperSpeed/commit/9aed6c8500d7c492d85c5c88687322dbda70e370\"\u003ethis DeeperSpeed commit\u003c/a\u003e and \u003ca href=\"https://github.com/EleutherAI/gpt-neox/commit/3782c7ae60f8624e566e3879b89bb09e8b59b869\"\u003ethis GPT-NeoX commit\u003c/a\u003e, respectively.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch5 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e2. Modifying Run Command or Environment Variables\u003c/h5\u003e\u003ca id=\"user-content-2-modifying-run-command-or-environment-variables\" class=\"anchor\" aria-label=\"Permalink: 2. Modifying Run Command or Environment Variables\" href=\"#2-modifying-run-command-or-environment-variables\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe have encountered many cases where we wish to modify the MPI/Slurm run command for an optimization or to debug (e.g. to modify the \u003ca href=\"https://slurm.schedmd.com/srun.html#OPT_cpu-bind\" rel=\"nofollow\"\u003eSlurm srun CPU binding\u003c/a\u003e or to tag MPI logs with the rank). In this case, you must modify the multinode runner class' run command under its \u003ccode\u003eget_cmd\u003c/code\u003e method (e.g. \u003ca href=\"https://github.com/microsoft/DeepSpeed/blob/17957728c0362bf8ae70feca308e491e55ef9feb/deepspeed/launcher/multinode_runner.py#L135-L147\"\u003empirun_cmd\u003c/a\u003e for OpenMPI). Examples on how we did this to provide optimized and rank-tagged run commands using Slurm and OpenMPI for the Stability cluster are in \u003ca href=\"https://github.com/microsoft/DeepSpeed/compare/master...EleutherAI:DeeperSpeed:v2.0-stability\"\u003ethis DeeperSpeed branch\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eHostfile Generation\u003c/h4\u003e\u003ca id=\"user-content-hostfile-generation\" class=\"anchor\" aria-label=\"Permalink: Hostfile Generation\" href=\"#hostfile-generation\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIn general you will not be able to have a single fixed hostfile, so you need to have a script to generate one dynamically when your job starts. An example script to dynamically generate a hostfile using \u003ca href=\"https://slurm.schedmd.com/documentation.html\" rel=\"nofollow\"\u003eSlurm\u003c/a\u003e and 8 GPUs per node is:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"#!/bin/bash\nGPUS_PER_NODE=8\nmkdir -p /sample/path/to/hostfiles\n# need to add the current slurm jobid to hostfile name so that we don't add to previous hostfile\nhostfile=/sample/path/to/hostfiles/hosts_$SLURM_JOBID\n# be extra sure we aren't appending to a previous hostfile\nrm $hostfile \u0026amp;\u0026gt; /dev/null\n# loop over the node names\nfor i in `scontrol show hostnames $SLURM_NODELIST`\ndo\n # add a line to the hostfile\n echo $i slots=$GPUS_PER_NODE \u0026gt;\u0026gt;$hostfile\ndone\"\u003e\u003cpre\u003e\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#!\u003c/span\u003e/bin/bash\u003c/span\u003e\nGPUS_PER_NODE=8\nmkdir -p /sample/path/to/hostfiles\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e need to add the current slurm jobid to hostfile name so that we don't add to previous hostfile\u003c/span\u003e\nhostfile=/sample/path/to/hostfiles/hosts_\u003cspan class=\"pl-smi\"\u003e$SLURM_JOBID\u003c/span\u003e\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e be extra sure we aren't appending to a previous hostfile\u003c/span\u003e\nrm \u003cspan class=\"pl-smi\"\u003e$hostfile\u003c/span\u003e \u003cspan class=\"pl-k\"\u003e\u0026amp;\u003c/span\u003e\u003cspan class=\"pl-k\"\u003e\u0026gt;\u003c/span\u003e /dev/null\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e loop over the node names\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-smi\"\u003ei\u003c/span\u003e \u003cspan class=\"pl-k\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e`\u003c/span\u003escontrol show hostnames \u003cspan class=\"pl-smi\"\u003e$SLURM_NODELIST\u003c/span\u003e\u003cspan class=\"pl-pds\"\u003e`\u003c/span\u003e\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003edo\u003c/span\u003e\n \u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e add a line to the hostfile\u003c/span\u003e\n \u003cspan class=\"pl-c1\"\u003eecho\u003c/span\u003e \u003cspan class=\"pl-smi\"\u003e$i\u003c/span\u003e slots=\u003cspan class=\"pl-smi\"\u003e$GPUS_PER_NODE\u003c/span\u003e \u003cspan class=\"pl-k\"\u003e\u0026gt;\u0026gt;\u003c/span\u003e\u003cspan class=\"pl-smi\"\u003e$hostfile\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003edone\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ccode\u003e$SLURM_JOBID\u003c/code\u003e and \u003ccode\u003e$SLURM_NODELIST\u003c/code\u003e being environment variables Slurm will create for you. See the \u003ca href=\"https://slurm.schedmd.com/sbatch.html#SECTION_OUTPUT-ENVIRONMENT-VARIABLES\" rel=\"nofollow\"\u003esbatch documentation\u003c/a\u003e for a full list of available Slurm environment variables set at job creation time.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eJob Launching\u003c/h4\u003e\u003ca id=\"user-content-job-launching\" class=\"anchor\" aria-label=\"Permalink: Job Launching\" href=\"#job-launching\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThen you can create an \u003ca href=\"https://slurm.schedmd.com/sbatch.html\" rel=\"nofollow\"\u003esbatch\u003c/a\u003e script from which to kick off your GPT-NeoX job. A bare-bones sbatch script on a Slurm-based cluster with 8 GPUs per node would look like this:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"#!/bin/bash\n#SBATCH --job-name=\u0026quot;neox\u0026quot;\n#SBATCH --partition=your-partition\n#SBATCH --nodes=1\n#SBATCH --ntasks-per-node=8\n#SBATCH --gres=gpu:8\n\n# Some potentially useful distributed environment variables\nexport HOSTNAMES=`scontrol show hostnames \u0026quot;$SLURM_JOB_NODELIST\u0026quot;`\nexport MASTER_ADDR=$(scontrol show hostnames \u0026quot;$SLURM_JOB_NODELIST\u0026quot; | head -n 1)\nexport MASTER_PORT=12802\nexport COUNT_NODE=`scontrol show hostnames \u0026quot;$SLURM_JOB_NODELIST\u0026quot; | wc -l`\n\n# Your hostfile creation script from above\n./write_hostfile.sh\n# Tell DeepSpeed where to find our generated hostfile via DLTS_HOSTFILE\nexport DLTS_HOSTFILE=/sample/path/to/hostfiles/hosts_$SLURM_JOBID\n\n# Launch training\npython3 deepy.py train.py /sample/path/to/your/configs/my_model.yml\n\"\u003e\u003cpre\u003e\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#!\u003c/span\u003e/bin/bash\u003c/span\u003e\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003eSBATCH --job-name=\"neox\"\u003c/span\u003e\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003eSBATCH --partition=your-partition\u003c/span\u003e\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003eSBATCH --nodes=1\u003c/span\u003e\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003eSBATCH --ntasks-per-node=8\u003c/span\u003e\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003eSBATCH --gres=gpu:8\u003c/span\u003e\n\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e Some potentially useful distributed environment variables\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eexport\u003c/span\u003e HOSTNAMES=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e`\u003c/span\u003escontrol show hostnames \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003cspan class=\"pl-smi\"\u003e$SLURM_JOB_NODELIST\u003c/span\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e\u003cspan class=\"pl-pds\"\u003e`\u003c/span\u003e\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eexport\u003c/span\u003e MASTER_ADDR=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e$(\u003c/span\u003escontrol show hostnames \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003cspan class=\"pl-smi\"\u003e$SLURM_JOB_NODELIST\u003c/span\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e \u003cspan class=\"pl-k\"\u003e|\u003c/span\u003e head -n 1\u003cspan class=\"pl-pds\"\u003e)\u003c/span\u003e\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eexport\u003c/span\u003e MASTER_PORT=12802\n\u003cspan class=\"pl-k\"\u003eexport\u003c/span\u003e COUNT_NODE=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e`\u003c/span\u003escontrol show hostnames \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003cspan class=\"pl-smi\"\u003e$SLURM_JOB_NODELIST\u003c/span\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e \u003cspan class=\"pl-k\"\u003e|\u003c/span\u003e wc -l\u003cspan class=\"pl-pds\"\u003e`\u003c/span\u003e\u003c/span\u003e\n\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e Your hostfile creation script from above\u003c/span\u003e\n./write_hostfile.sh\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e Tell DeepSpeed where to find our generated hostfile via DLTS_HOSTFILE\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eexport\u003c/span\u003e DLTS_HOSTFILE=/sample/path/to/hostfiles/hosts_\u003cspan class=\"pl-smi\"\u003e$SLURM_JOBID\u003c/span\u003e\n\n\u003cspan class=\"pl-c\"\u003e\u003cspan class=\"pl-c\"\u003e#\u003c/span\u003e Launch training\u003c/span\u003e\npython3 deepy.py train.py /sample/path/to/your/configs/my_model.yml\n\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eYou can then kick off a training run with \u003ccode\u003esbatch my_sbatch_script.sh\u003c/code\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eContainerized Setup\u003c/h3\u003e\u003ca id=\"user-content-containerized-setup\" class=\"anchor\" aria-label=\"Permalink: Containerized Setup\" href=\"#containerized-setup\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe also provide a Dockerfile and docker-compose configuration if you prefer to run NeoX in a container.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eRequirements to run the container are to have appropriate GPU drivers, an up-to-date installation of Docker, and \u003ca href=\"https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html\" rel=\"nofollow\"\u003envidia-container-toolkit\u003c/a\u003e installed. To test if your installation is good you can use their \"sample workload\", which is:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003edocker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eProvided that will run, you need to export NEOX_DATA_PATH and NEOX_CHECKPOINT_PATH in your environment to specify your data directory and directory for storing and loading checkpoints:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"export NEOX_DATA_PATH=/mnt/sda/data/enwiki8 #or wherever your data is stored on your system\nexport NEOX_CHECKPOINT_PATH=/mnt/sda/checkpoints\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003eexport NEOX_DATA_PATH=/mnt/sda/data/enwiki8 #or wherever your data is stored on your system\nexport NEOX_CHECKPOINT_PATH=/mnt/sda/checkpoints\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAnd then, from the gpt-neox directory, you can build the image and run a shell in a container with\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"docker compose run gpt-neox bash\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003edocker compose run gpt-neox bash\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAfter the build, you should be able to do this:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"mchorse@537851ed67de:~$ echo $(pwd)\n/home/mchorse\nmchorse@537851ed67de:~$ ls -al\ntotal 48\ndrwxr-xr-x 1 mchorse mchorse 4096 Jan 8 05:33 .\ndrwxr-xr-x 1 root root 4096 Jan 8 04:09 ..\n-rw-r--r-- 1 mchorse mchorse 220 Feb 25 2020 .bash_logout\n-rw-r--r-- 1 mchorse mchorse 3972 Jan 8 04:09 .bashrc\ndrwxr-xr-x 4 mchorse mchorse 4096 Jan 8 05:35 .cache\ndrwx------ 3 mchorse mchorse 4096 Jan 8 05:33 .nv\n-rw-r--r-- 1 mchorse mchorse 807 Feb 25 2020 .profile\ndrwxr-xr-x 2 root root 4096 Jan 8 04:09 .ssh\ndrwxrwxr-x 8 mchorse mchorse 4096 Jan 8 05:35 chk\ndrwxrwxrwx 6 root root 4096 Jan 7 17:02 data\ndrwxr-xr-x 11 mchorse mchorse 4096 Jan 8 03:52 gpt-neox\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003emchorse@537851ed67de:~$ echo $(pwd)\n/home/mchorse\nmchorse@537851ed67de:~$ ls -al\ntotal 48\ndrwxr-xr-x 1 mchorse mchorse 4096 Jan 8 05:33 .\ndrwxr-xr-x 1 root root 4096 Jan 8 04:09 ..\n-rw-r--r-- 1 mchorse mchorse 220 Feb 25 2020 .bash_logout\n-rw-r--r-- 1 mchorse mchorse 3972 Jan 8 04:09 .bashrc\ndrwxr-xr-x 4 mchorse mchorse 4096 Jan 8 05:35 .cache\ndrwx------ 3 mchorse mchorse 4096 Jan 8 05:33 .nv\n-rw-r--r-- 1 mchorse mchorse 807 Feb 25 2020 .profile\ndrwxr-xr-x 2 root root 4096 Jan 8 04:09 .ssh\ndrwxrwxr-x 8 mchorse mchorse 4096 Jan 8 05:35 chk\ndrwxrwxrwx 6 root root 4096 Jan 7 17:02 data\ndrwxr-xr-x 11 mchorse mchorse 4096 Jan 8 03:52 gpt-neox\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFor a long-running job, you should run\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"docker compose up -d\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003edocker compose up -d\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eto run the container in detached mode, and then, in a separate terminal session, run\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"docker compose exec gpt-neox bash\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003edocker compose exec gpt-neox bash\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eYou can then run any job you want from inside the container.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eConcerns when running for a long time or in detached mode include\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eYou will have to terminate the container manually when you are no longer using it\u003c/li\u003e\n\u003cli\u003eIf you want processes to continue running when your shell session ends, you will need to background them.\u003c/li\u003e\n\u003cli\u003eIf you then want logging, you will have to make sure to pipe logs to disk, and set up wandb and/or Comet logging.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eIf you prefer to run the prebuilt container image from dockerhub, you can run the docker compose commands with \u003ccode\u003e-f docker-compose-dockerhub.yml\u003c/code\u003e instead, e.g.,\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"docker compose run -f docker-compose-dockerhub.yml gpt-neox bash\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003edocker compose run -f docker-compose-dockerhub.yml gpt-neox bash\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsage\u003c/h2\u003e\u003ca id=\"user-content-usage\" class=\"anchor\" aria-label=\"Permalink: Usage\" href=\"#usage\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAll functionality should be launched using \u003ccode\u003edeepy.py\u003c/code\u003e, a wrapper around the \u003ccode\u003edeepspeed\u003c/code\u003e launcher.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe currently offer three main functions:\u003c/p\u003e\n\u003col dir=\"auto\"\u003e\n\u003cli\u003e\u003ccode\u003etrain.py\u003c/code\u003e is used for training and finetuning models.\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003eeval.py\u003c/code\u003e is used to evaluate a trained model using the \u003ca href=\"https://github.com/EleutherAI/lm-evaluation-harness\"\u003elanguage model evaluation harness\u003c/a\u003e.\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003egenerate.py\u003c/code\u003e is used to sample text from a trained model.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp dir=\"auto\"\u003ewhich can be launched with:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"./deepy.py [script.py] [./path/to/config_1.yml] [./path/to/config_2.yml] ... [./path/to/config_n.yml]\"\u003e\u003cpre\u003e./deepy.py [script.py] [./path/to/config_1.yml] [./path/to/config_2.yml] ... [./path/to/config_n.yml]\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFor example, to launch training you can run\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"./deepy.py train.py ./configs/20B.yml ./configs/local_cluster.yml\"\u003e\u003cpre\u003e./deepy.py train.py ./configs/20B.yml ./configs/local_cluster.yml\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFor more details on each entry point, see the \u003ca href=\"#training-and-finetuning\"\u003eTraining and Finetuning\u003c/a\u003e, \u003ca href=\"#inference\"\u003eInference\u003c/a\u003e and \u003ca href=\"#evaluation\"\u003eEvaluation\u003c/a\u003e respectively.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eConfiguration\u003c/h1\u003e\u003ca id=\"user-content-configuration\" class=\"anchor\" aria-label=\"Permalink: Configuration\" href=\"#configuration\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eGPT-NeoX parameters are defined in a YAML configuration file which is passed to the deepy.py launcher. We have provided some example .yml files in \u003ca href=\"/EleutherAI/gpt-neox/blob/main/configs\"\u003econfigs\u003c/a\u003e, showing a diverse array of features and model sizes.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThese files are generally complete, but non-optimal. For example, depending on your specific GPU configuration, you may need to change some settings such as \u003ccode\u003epipe-parallel-size\u003c/code\u003e, \u003ccode\u003emodel-parallel-size\u003c/code\u003e to increase or decrease the degree of parallelisation, \u003ccode\u003etrain_micro_batch_size_per_gpu\u003c/code\u003e or \u003ccode\u003egradient-accumulation-steps\u003c/code\u003e to modify batch size related settings, or the \u003ccode\u003ezero_optimization\u003c/code\u003e dict to modify how optimizer states are parallelised across workers.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor a more detailed guide to the features available and how to configure them, see \u003ca href=\"/EleutherAI/gpt-neox/blob/main/configs/README.md\"\u003ethe configuration README\u003c/a\u003e, and for documentation of every possible argument, see \u003ca href=\"/EleutherAI/gpt-neox/blob/main/configs/neox_arguments.md\"\u003econfigs/neox_arguments.md\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eMixture of Experts\u003c/h2\u003e\u003ca id=\"user-content-mixture-of-experts\" class=\"anchor\" aria-label=\"Permalink: Mixture of Experts\" href=\"#mixture-of-experts\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eGPT-NeoX includes multiple expert implementations for MoE. To select between them, specify \u003ccode\u003emoe_type\u003c/code\u003e of \u003ccode\u003emegablocks\u003c/code\u003e (default) or \u003ccode\u003edeepspeed\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eBoth are based on the DeepSpeed MoE parallelism framework, which supports tensor-expert-data parallelism.\nBoth allow you to toggle between token-dropping and dropless (default, and this is what Megablocks was designed for).\nSinkhorn routing to come soon!\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor an example of a basic complete configuration, see configs/125M-dmoe.yml (for Megablocks dropless) or configs/125M-moe.yml.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eMost MoE related configuration arguments are prefixed with \u003ccode\u003emoe\u003c/code\u003e. Some common configuration parameters and their defaults are as follows:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"moe_type: megablocks\nmoe_num_experts: 1 # 1 disables MoE. 8 is a reasonable value.\nmoe_loss_coeff: 0.1\nexpert_interval: 2 # See details below\nenable_expert_tensor_parallelism: false # See details below\nmoe_expert_parallel_size: 1 # See details below\nmoe_token_dropping: false\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003emoe_type: megablocks\nmoe_num_experts: 1 # 1 disables MoE. 8 is a reasonable value.\nmoe_loss_coeff: 0.1\nexpert_interval: 2 # See details below\nenable_expert_tensor_parallelism: false # See details below\nmoe_expert_parallel_size: 1 # See details below\nmoe_token_dropping: false\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eDeepSpeed can be further configured with the following:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"moe_top_k: 1\nmoe_min_capacity: 4\nmoe_train_capacity_factor: 1.0 # Setting to 1.0\nmoe_eval_capacity_factor: 1.0 # Setting to 1.0\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003emoe_top_k: 1\nmoe_min_capacity: 4\nmoe_train_capacity_factor: 1.0 # Setting to 1.0\nmoe_eval_capacity_factor: 1.0 # Setting to 1.0\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eOne MoE layer is present every \u003ccode\u003eexpert_interval\u003c/code\u003e transformer layers including the first, so with 12 layers total:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003e0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eExperts would be in these layers:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"0, 2, 4, 6, 8, 10\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003e0, 2, 4, 6, 8, 10\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eBy default, we use expert-data parallelism, so any available tensor parallelism (\u003ccode\u003emodel_parallel_size\u003c/code\u003e) will be used for expert routing. For instance, given the following:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"expert_parallel_size: 4\nmodel_parallel_size: 2 # aka tensor parallelism\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003eexpert_parallel_size: 4\nmodel_parallel_size: 2 # aka tensor parallelism\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWith 32 GPUs, the behavior will be look like:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eIn non-expert layers:\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eTensor parallelism is 2. (There are 32 / 2 = 16 such tensor parallel groups, each of size 2.)\u003c/li\u003e\n\u003cli\u003eData parallelism implicitly becomes 32 / 2 = 16.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003eIn expert layers:\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eThere is no tensor parallelism.\u003c/li\u003e\n\u003cli\u003eExpert parallelism is 4. (There are 32 / 4 = 8 expert parallel groups, each of size 4.)\u003c/li\u003e\n\u003cli\u003eData parallelism implicitly becomes 32 / 4 = 8. Some cross-node token routing happens as a result of this redivision of data parallelism between 16 and 8. To avoid it, ensure that \u003ccode\u003eexpert_parallel_size == model_parallel_size\u003c/code\u003e.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eSetting \u003ccode\u003eenable_expert_tensor_parallelism\u003c/code\u003e enables tensor-expert-data (TED) parallelism. The way to interpret the above would then be:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eIn non-expert layers: same as before.\u003c/li\u003e\n\u003cli\u003eIn expert layers:\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eTensor parallelism is 2. (There are 32 / 2 = 16 tensor parallel groups, each of size 2.)\u003c/li\u003e\n\u003cli\u003eExpert parallelism is 4. (There are 32 / 4 = 8 expert parallel groups, each of size 4.)\u003c/li\u003e\n\u003cli\u003eData parallelism implicitly becomes 32 / (2 * 4) = 4. Again, cross-node token routing happens. To avoid, ensure \u003ccode\u003eexpert_parallel_size == 1\u003c/code\u003e or \u003ccode\u003emodel_parallel_size == 1\u003c/code\u003e.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eSo note that DP must be divisible by (MP * EP). For more details, see the \u003ca href=\"https://arxiv.org/abs/2303.06318\" rel=\"nofollow\"\u003eTED paper\u003c/a\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003ePipeline parallelism is not yet supported - coming soon!\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eDatasets\u003c/h1\u003e\u003ca id=\"user-content-datasets\" class=\"anchor\" aria-label=\"Permalink: Datasets\" href=\"#datasets\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePreconfigured Datasets\u003c/h2\u003e\u003ca id=\"user-content-preconfigured-datasets\" class=\"anchor\" aria-label=\"Permalink: Preconfigured Datasets\" href=\"#preconfigured-datasets\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eSeveral preconfigured datasets are available, including most components from \u003ca href=\"https://arxiv.org/abs/2101.00027\" rel=\"nofollow\"\u003ethe Pile\u003c/a\u003e, as well as the Pile train set itself, for straightforward tokenization using the \u003ccode\u003eprepare_data.py\u003c/code\u003e entry point.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eE.G, to download and tokenize the enwik8 dataset with the GPT2 Tokenizer, saving them to \u003ccode\u003e./data\u003c/code\u003e you can run:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"python prepare_data.py -d ./data\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003epython prepare_data.py -d ./data\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eor a single shard of the pile (\u003ccode\u003epile_subset\u003c/code\u003e) with the GPT-NeoX-20B tokenizer (assuming you have it saved at \u003ccode\u003e./20B_checkpoints/20B_tokenizer.json\u003c/code\u003e):\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"python prepare_data.py -d ./data -t HFTokenizer --vocab-file ./20B_checkpoints/20B_tokenizer.json pile_subset\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003epython prepare_data.py -d ./data -t HFTokenizer --vocab-file ./20B_checkpoints/20B_tokenizer.json pile_subset\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe tokenized data will be saved out to two files: \u003ccode\u003e[data-dir]/[dataset-name]/[dataset-name]_text_document.bin\u003c/code\u003eand \u003ccode\u003e[data-dir]/[dataset-name]/[dataset-name]_text_document.idx\u003c/code\u003e. You will need to add the prefix that both these files share to your training configuration file under the \u003ccode\u003edata-path\u003c/code\u003e field. E.G:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-yaml notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\" \u0026quot;data-path\u0026quot;: \u0026quot;./data/enwik8/enwik8_text_document\u0026quot;,\"\u003e\u003cpre\u003e \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003cspan class=\"pl-ent\"\u003edata-path\u003c/span\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e./data/enwik8/enwik8_text_document\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e,\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing Custom Data\u003c/h2\u003e\u003ca id=\"user-content-using-custom-data\" class=\"anchor\" aria-label=\"Permalink: Using Custom Data\" href=\"#using-custom-data\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo prepare your own dataset for training with custom data, format it as one large \u003ca href=\"https://jsonlines.org/\" rel=\"nofollow\"\u003ejsonl\u003c/a\u003e-formatted file with each item in the list of dictionaries being a separate document. The document text should be grouped under one JSON key, i.e \u003ccode\u003e\"text\"\u003c/code\u003e. Any auxiliary data stored in other fields will not be used.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNext make sure to download the GPT2 tokenizer vocab, and merge files from the following links:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eVocab: \u003ca href=\"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json\" rel=\"nofollow\"\u003ehttps://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eMerge: \u003ca href=\"https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt\" rel=\"nofollow\"\u003ehttps://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eOr use the 20B tokenizer (for which only a single Vocab file is needed):\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eVocab: \u003ca href=\"https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/20B_tokenizer.json\" rel=\"nofollow\"\u003ehttps://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/20B_tokenizer.json\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003e(alternatively, you can provide any tokenizer file that can be loaded by Hugging Face's tokenizers library with the \u003ccode\u003eTokenizer.from_pretrained()\u003c/code\u003e command)\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eYou can now pretokenize your data using \u003ccode\u003etools/datasets/preprocess_data.py\u003c/code\u003e, the arguments for which are detailed below:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"usage: preprocess_data.py [-h] --input INPUT [--jsonl-keys JSONL_KEYS [JSONL_KEYS ...]] [--num-docs NUM_DOCS] --tokenizer-type {HFGPT2Tokenizer,HFTokenizer,GPT2BPETokenizer,CharLevelTokenizer} [--vocab-file VOCAB_FILE] [--merge-file MERGE_FILE] [--append-eod] [--ftfy] --output-prefix OUTPUT_PREFIX\n [--dataset-impl {lazy,cached,mmap}] [--workers WORKERS] [--log-interval LOG_INTERVAL]\n\noptional arguments:\n -h, --help show this help message and exit\n\ninput data:\n --input INPUT Path to input jsonl files or lmd archive(s) - if using multiple archives, put them in a comma separated list\n --jsonl-keys JSONL_KEYS [JSONL_KEYS ...]\n space separate listed of keys to extract from jsonl. Default: text\n --num-docs NUM_DOCS Optional: Number of documents in the input data (if known) for an accurate progress bar.\n\ntokenizer:\n --tokenizer-type {HFGPT2Tokenizer,HFTokenizer,GPT2BPETokenizer,CharLevelTokenizer}\n What type of tokenizer to use.\n --vocab-file VOCAB_FILE\n Path to the vocab file\n --merge-file MERGE_FILE\n Path to the BPE merge file (if necessary).\n --append-eod Append an \u0026lt;eod\u0026gt; token to the end of a document.\n --ftfy Use ftfy to clean text\n\noutput data:\n --output-prefix OUTPUT_PREFIX\n Path to binary output file without suffix\n --dataset-impl {lazy,cached,mmap}\n Dataset implementation to use. Default: mmap\n\nruntime:\n --workers WORKERS Number of worker processes to launch\n --log-interval LOG_INTERVAL\n Interval between progress updates\n\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003eusage: preprocess_data.py [-h] --input INPUT [--jsonl-keys JSONL_KEYS [JSONL_KEYS ...]] [--num-docs NUM_DOCS] --tokenizer-type {HFGPT2Tokenizer,HFTokenizer,GPT2BPETokenizer,CharLevelTokenizer} [--vocab-file VOCAB_FILE] [--merge-file MERGE_FILE] [--append-eod] [--ftfy] --output-prefix OUTPUT_PREFIX\n [--dataset-impl {lazy,cached,mmap}] [--workers WORKERS] [--log-interval LOG_INTERVAL]\n\noptional arguments:\n -h, --help show this help message and exit\n\ninput data:\n --input INPUT Path to input jsonl files or lmd archive(s) - if using multiple archives, put them in a comma separated list\n --jsonl-keys JSONL_KEYS [JSONL_KEYS ...]\n space separate listed of keys to extract from jsonl. Default: text\n --num-docs NUM_DOCS Optional: Number of documents in the input data (if known) for an accurate progress bar.\n\ntokenizer:\n --tokenizer-type {HFGPT2Tokenizer,HFTokenizer,GPT2BPETokenizer,CharLevelTokenizer}\n What type of tokenizer to use.\n --vocab-file VOCAB_FILE\n Path to the vocab file\n --merge-file MERGE_FILE\n Path to the BPE merge file (if necessary).\n --append-eod Append an \u0026lt;eod\u0026gt; token to the end of a document.\n --ftfy Use ftfy to clean text\n\noutput data:\n --output-prefix OUTPUT_PREFIX\n Path to binary output file without suffix\n --dataset-impl {lazy,cached,mmap}\n Dataset implementation to use. Default: mmap\n\nruntime:\n --workers WORKERS Number of worker processes to launch\n --log-interval LOG_INTERVAL\n Interval between progress updates\n\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFor example:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"python tools/datasets/preprocess_data.py \\\n --input ./data/mydataset.jsonl.zst \\\n --output-prefix ./data/mydataset \\\n --vocab ./data/gpt2-vocab.json \\\n --merge-file gpt2-merges.txt \\\n --dataset-impl mmap \\\n --tokenizer-type GPT2BPETokenizer \\\n --append-eod\"\u003e\u003cpre\u003epython tools/datasets/preprocess_data.py \\\n --input ./data/mydataset.jsonl.zst \\\n --output-prefix ./data/mydataset \\\n --vocab ./data/gpt2-vocab.json \\\n --merge-file gpt2-merges.txt \\\n --dataset-impl mmap \\\n --tokenizer-type GPT2BPETokenizer \\\n --append-eod\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eYou would then run training with the following settings added to your configuration file:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-yaml notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\" \u0026quot;data-path\u0026quot;: \u0026quot;data/mydataset_text_document\u0026quot;,\"\u003e\u003cpre\u003e \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003cspan class=\"pl-ent\"\u003edata-path\u003c/span\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003edata/mydataset_text_document\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e,\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eTraining and Finetuning\u003c/h1\u003e\u003ca id=\"user-content-training-and-finetuning\" class=\"anchor\" aria-label=\"Permalink: Training and Finetuning\" href=\"#training-and-finetuning\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTraining is launched using \u003ccode\u003edeepy.py\u003c/code\u003e, a wrapper around DeepSpeed's launcher, which launches the same script in parallel across many GPUs / nodes.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe general usage pattern is:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"python ./deepy.py train.py [path/to/config1.yml] [path/to/config2.yml] ...\"\u003e\u003cpre\u003epython ./deepy.py train.py [path/to/config1.yml] [path/to/config2.yml] ...\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eYou can pass in an arbitrary number of configs which will all be merged at runtime.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eYou can also optionally pass in a config prefix, which will assume all your configs are in the same folder and append that prefix to their path.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor example:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"python ./deepy.py train.py -d configs 125M.yml local_setup.yml\"\u003e\u003cpre\u003epython ./deepy.py train.py -d configs 125M.yml local_setup.yml\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThis will deploy the \u003ccode\u003etrain.py\u003c/code\u003e script on all nodes with one process per GPU. The worker nodes and number of GPUs are specified in the \u003ccode\u003e/job/hostfile\u003c/code\u003e file (see \u003ca href=\"/EleutherAI/gpt-neox/blob/main/configs/README.md\"\u003eparameter documentation\u003c/a\u003e), or can simply be passed in as the \u003ccode\u003enum_gpus\u003c/code\u003e arg if running on a single node setup.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAlthough this is not strictly necessary, we find it useful to define the model parameters in one config file (e.g \u003ccode\u003econfigs/125M.yml\u003c/code\u003e) and the data path parameters in another (e.g \u003ccode\u003econfigs/local_setup.yml\u003c/code\u003e).\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePretrained Models\u003c/h2\u003e\u003ca id=\"user-content-pretrained-models\" class=\"anchor\" aria-label=\"Permalink: Pretrained Models\" href=\"#pretrained-models\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eGPT-NeoX-20B\u003c/h3\u003e\u003ca id=\"user-content-gpt-neox-20b\" class=\"anchor\" aria-label=\"Permalink: GPT-NeoX-20B\" href=\"#gpt-neox-20b\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eGPT-NeoX-20B is a 20 billion parameter autoregressive language model trained on \u003ca href=\"https://arxiv.org/abs/2101.00027\" rel=\"nofollow\"\u003ethe Pile\u003c/a\u003e. Technical details about GPT-NeoX-20B can be found in \u003ca href=\"https://arxiv.org/abs/2204.06745\" rel=\"nofollow\"\u003ethe associated paper\u003c/a\u003e. The configuration file for this model is both available at \u003ca href=\"/EleutherAI/gpt-neox/blob/main/configs/20B.yml\"\u003e\u003ccode\u003e./configs/20B.yml\u003c/code\u003e\u003c/a\u003e and included in the download links below.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/\" rel=\"nofollow\"\u003eSlim weights\u003c/a\u003e - (No optimizer states, for inference or finetuning, 39GB)\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo download from the command line to a folder named \u003ccode\u003e20B_checkpoints\u003c/code\u003e, use the following command:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"wget --cut-dirs=5 -nH -r --no-parent --reject \u0026quot;index.html*\u0026quot; https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/ -P 20B_checkpoints\"\u003e\u003cpre\u003ewget --cut-dirs=5 -nH -r --no-parent --reject \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003eindex.html*\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/ -P 20B_checkpoints\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://the-eye.eu/public/AI/models/GPT-NeoX-20B/full_weights/\" rel=\"nofollow\"\u003eFull weights\u003c/a\u003e - (Including optimizer states, 268GB)\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo download from the command line to a folder named \u003ccode\u003e20B_checkpoints\u003c/code\u003e, use the following command:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"wget --cut-dirs=5 -nH -r --no-parent --reject \u0026quot;index.html*\u0026quot; https://the-eye.eu/public/AI/models/GPT-NeoX-20B/full_weights/ -P 20B_checkpoints\"\u003e\u003cpre\u003ewget --cut-dirs=5 -nH -r --no-parent --reject \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003eindex.html*\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003e\u003c/span\u003e https://the-eye.eu/public/AI/models/GPT-NeoX-20B/full_weights/ -P 20B_checkpoints\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWeights can be alternatively be downloaded using a BitTorrent client. Torrent files can be downloaded here: \u003ca href=\"https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights.torrent\" rel=\"nofollow\"\u003eslim weights\u003c/a\u003e, \u003ca href=\"https://the-eye.eu/public/AI/models/GPT-NeoX-20B/full_weights.torrent\" rel=\"nofollow\"\u003efull weights\u003c/a\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe additionally have 150 checkpoints saved throughout training, one every 1,000 steps. We are working on figuring out how to best serve these at scale, but in the meanwhile people interested in working with the partially trained checkpoints can email us at \u003ca href=\"mailto:contact@eleuther.ai\"\u003econtact@eleuther.ai\u003c/a\u003e to arrange access.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePythia\u003c/h3\u003e\u003ca id=\"user-content-pythia\" class=\"anchor\" aria-label=\"Permalink: Pythia\" href=\"#pythia\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe Pythia Scaling Suite is a suite of models ranging from 70M parameters to 12B parameters trained on \u003ca href=\"https://pile.eleuther.ai\" rel=\"nofollow\"\u003ethe Pile\u003c/a\u003e intended to promote research on interpretability and training dynamics of large language models. Further details about the project and links to the models can be found in the \u003ca href=\"https://arxiv.org/abs/2304.01373\" rel=\"nofollow\"\u003ein the paper\u003c/a\u003e and \u003ca href=\"https://github.com/EleutherAI/pythia\"\u003eon the project's GitHub\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePolyglot\u003c/h3\u003e\u003ca id=\"user-content-polyglot\" class=\"anchor\" aria-label=\"Permalink: Polyglot\" href=\"#polyglot\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe Polyglot Project is an effort to train powerful non-English pretrained language models to promote the accessibility of this technology to researchers outside the dominant powerhouses of machine learning. EleutherAI has trained and released 1.3B, 3.8B, and 5.8B parameter Korean language models, the largest of which outpreforms all other publicly available language models on Korean language tasks. Further details about the project and links to the models can be found \u003ca href=\"https://github.com/EleutherAI/polyglot\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eInference\u003c/h1\u003e\u003ca id=\"user-content-inference\" class=\"anchor\" aria-label=\"Permalink: Inference\" href=\"#inference\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003eFor most uses we recommend deploying models trained using the GPT-NeoX library via the Hugging Face Transformers library which is better optimized for inference.\u003c/strong\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe support three types of generation from a pretrained model:\u003c/p\u003e\n\u003col dir=\"auto\"\u003e\n\u003cli\u003eUnconditional generation\u003c/li\u003e\n\u003cli\u003eConditional generation based on an input read from a file\u003c/li\u003e\n\u003cli\u003eInteractive generation, which allows for multiple rounds of back-and-forth between a user and the language model via a command line interface\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp dir=\"auto\"\u003eAll three types of text generation can be launched via \u003ccode\u003epython ./deepy.py generate.py -d configs 125M.yml local_setup.yml text_generation.yml\u003c/code\u003e with the appropriate values set in \u003ccode\u003econfigs/text_generation.yml\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eEvaluation\u003c/h1\u003e\u003ca id=\"user-content-evaluation\" class=\"anchor\" aria-label=\"Permalink: Evaluation\" href=\"#evaluation\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eGPT-NeoX supports evaluation on downstream tasks through the \u003ca href=\"https://github.com/EleutherAI/lm-evaluation-harness\"\u003elanguage model evaluation harness\u003c/a\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo evaluate a trained model on the evaluation harness, simply run:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"python ./deepy.py eval.py -d configs your_configs.yml --eval_tasks task1 task2 ... taskn\"\u003e\u003cpre\u003epython ./deepy.py eval.py -d configs your_configs.yml --eval_tasks task1 task2 ... taskn\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003ewhere \u003ccode\u003e--eval_tasks\u003c/code\u003e is a list of evaluation tasks followed by spaces, e.g \u003ccode\u003e--eval_tasks lambada hellaswag piqa sciq\u003c/code\u003e. For details of all tasks available, refer to the \u003ca href=\"https://github.com/EleutherAI/lm-evaluation-harness\"\u003elm-evaluation-harness repo\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eExporting to Hugging Face\u003c/h1\u003e\u003ca id=\"user-content-exporting-to-hugging-face\" class=\"anchor\" aria-label=\"Permalink: Exporting to Hugging Face\" href=\"#exporting-to-hugging-face\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eGPT-NeoX is optimized heavily for training only, and GPT-NeoX model checkpoints are not compatible out of the box with other deep learning libraries. To make models easily loadable and shareable with end users, and for further exporting to various other frameworks, GPT-NeoX supports checkpoint conversion to the \u003ca href=\"https://arxiv.org/abs/1910.03771\" rel=\"nofollow\"\u003eHugging Face Transformers\u003c/a\u003e format.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThough NeoX supports a number of different architectural configurations, including AliBi positional embeddings, not all of these configurations map cleanly onto the supported configurations within Hugging Face Transformers.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNeoX supports export of compatible models into the following architectures:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eGPTNeoXForCausalLM\u003c/li\u003e\n\u003cli\u003eLlamaForCausalLM\u003c/li\u003e\n\u003cli\u003eMistralForCausalLM\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eTraining a model which does not fit into one of these Hugging Face Transformers architectures cleanly will require writing custom modeling code for the exported model.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo convert a GPT-NeoX library checkpoint to Hugging Face-loadable format, run:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"python ./tools/ckpts/convert_neox_to_hf.py --input_dir /path/to/model/global_stepXXX --config_file your_config.yml --output_dir hf_model/save/location --precision {auto,fp16,bf16,fp32} --architecture {neox,mistral,llama}\"\u003e\u003cpre\u003epython ./tools/ckpts/convert_neox_to_hf.py --input_dir /path/to/model/global_stepXXX --config_file your_config.yml --output_dir hf_model/save/location --precision {auto,fp16,bf16,fp32} --architecture {neox,mistral,llama}\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThen to upload a model to \u003ca href=\"https://huggingface.co/\" rel=\"nofollow\"\u003ethe Hugging Face Hub\u003c/a\u003e, run:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"huggingface-cli login\npython ./tools/ckpts/upload.py\"\u003e\u003cpre\u003ehuggingface-cli login\npython ./tools/ckpts/upload.py\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eand input the requested information, including HF hub user token.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eImporting Models Into GPT-NeoX\u003c/h3\u003e\u003ca id=\"user-content-importing-models-into-gpt-neox\" class=\"anchor\" aria-label=\"Permalink: Importing Models Into GPT-NeoX\" href=\"#importing-models-into-gpt-neox\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNeoX supplies several utilities for converting a pretrained model checkpoint into a format that can be trained within the library.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe following models or model families can be loaded in GPT-NeoX:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eLlama 1\u003c/li\u003e\n\u003cli\u003eLlama 2\u003c/li\u003e\n\u003cli\u003eCodeLlama\u003c/li\u003e\n\u003cli\u003eMistral-7b-v0.1\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eWe provide two utilities for converting from two different checkpoint formats into a format compatible with GPT-NeoX.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo convert a Llama 1 or Llama 2 checkpoint distributed by Meta AI from its original file format (downloadable \u003ca href=\"https://github.com/facebookresearch/llama\"\u003ehere\u003c/a\u003e or \u003ca href=\"https://huggingface.co/meta-llama/Llama-2-7b\" rel=\"nofollow\"\u003ehere\u003c/a\u003e) into the GPT-NeoX library, run\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"python tools/ckpts/convert_raw_llama_weights_to_neox.py --input_dir /path/to/model/parent/dir/7B --model_size 7B --output_dir /path/to/save/ckpt --num_output_shards \u0026lt;TENSOR_PARALLEL_SIZE\u0026gt; (--pipeline_parallel if pipeline-parallel-size \u0026gt;= 1)\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003epython tools/ckpts/convert_raw_llama_weights_to_neox.py --input_dir /path/to/model/parent/dir/7B --model_size 7B --output_dir /path/to/save/ckpt --num_output_shards \u0026lt;TENSOR_PARALLEL_SIZE\u0026gt; (--pipeline_parallel if pipeline-parallel-size \u0026gt;= 1)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo convert from a Hugging Face model into a NeoX-loadable, run \u003ccode\u003etools/ckpts/convert_hf_to_sequential.py\u003c/code\u003e. See documentation within that file for further options.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eMonitoring\u003c/h1\u003e\u003ca id=\"user-content-monitoring\" class=\"anchor\" aria-label=\"Permalink: Monitoring\" href=\"#monitoring\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIn addition to storing logs locally, we provide built-in support for two popular experiment monitoring frameworks: \u003ca href=\"https://wandb.ai/site\" rel=\"nofollow\"\u003eWeights \u0026amp; Biases\u003c/a\u003e, \u003ca href=\"https://www.tensorflow.org/tensorboard/\" rel=\"nofollow\"\u003eTensorBoard\u003c/a\u003e, and \u003ca href=\"https://www.comet.com/site\" rel=\"nofollow\"\u003eComet\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eWeights and Biases\u003c/h2\u003e\u003ca id=\"user-content-weights-and-biases\" class=\"anchor\" aria-label=\"Permalink: Weights and Biases\" href=\"#weights-and-biases\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://wandb.ai/eleutherai/neox\" rel=\"nofollow\"\u003eWeights \u0026amp; Biases to record our experiments\u003c/a\u003e is a machine learning monitoring platform. To use wandb to monitor your gpt-neox experiments:\u003c/p\u003e\n\u003col dir=\"auto\"\u003e\n\u003cli\u003eCreate an account at \u003ca href=\"https://wandb.ai/site\" rel=\"nofollow\"\u003ehttps://wandb.ai/site\u003c/a\u003e to generate your API key\u003c/li\u003e\n\u003cli\u003eLog into Weights \u0026amp; Biases on your machine—you can do this by executing \u003ccode\u003ewandb login\u003c/code\u003e—your runs will automatically be recorded.\u003c/li\u003e\n\u003cli\u003eDependencies required for wandb monitoring can be found in and installed from \u003ccode\u003e./requirements/requirements-wandb.txt\u003c/code\u003e. An example config is provided in \u003ccode\u003e./configs/local_setup_wandb.yml\u003c/code\u003e.\u003c/li\u003e\n\u003cli\u003eThere are two optional fields associated with Weights \u0026amp; Biases: \u003ccode\u003e\u003cvar\u003ewandb_group\u003c/var\u003e\u003c/code\u003e allows you to name the run group and \u003ccode\u003e\u003cvar\u003ewandb_team\u003c/var\u003e\u003c/code\u003e allows you to assign your runs to an organization or team account. An example config is provided in \u003ccode\u003e./configs/local_setup_wandb.yml\u003c/code\u003e.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eTensorBoard\u003c/h2\u003e\u003ca id=\"user-content-tensorboard\" class=\"anchor\" aria-label=\"Permalink: TensorBoard\" href=\"#tensorboard\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe support using TensorBoard via the \u003ccode\u003e\u003cvar\u003etensorboard-dir\u003c/var\u003e\u003c/code\u003e field. Dependencies required for TensorBoard monitoring can be found in and installed from \u003ccode\u003e./requirements/requirements-tensorboard.txt\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eComet\u003c/h2\u003e\u003ca id=\"user-content-comet\" class=\"anchor\" aria-label=\"Permalink: Comet\" href=\"#comet\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://www.comet.com/site\" rel=\"nofollow\"\u003eComet\u003c/a\u003e is a machine learning monitoring platform. To use comet to monitor your gpt-neox experiments:\u003c/p\u003e\n\u003col dir=\"auto\"\u003e\n\u003cli\u003eCreate an account at \u003ca href=\"https://www.comet.com/login\" rel=\"nofollow\"\u003ehttps://www.comet.com/login\u003c/a\u003e to generate your API key.\u003c/li\u003e\n\u003cli\u003eOnce generated, link your API key at runtime by running \u003ccode\u003ecomet login\u003c/code\u003e or passing \u003ccode\u003eexport COMET_API_KEY=\u0026lt;your-key-here\u0026gt;\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003eInstall \u003ccode\u003ecomet_ml\u003c/code\u003e and any dependency libraries via \u003ccode\u003epip install -r requirements/requirements-comet.txt\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003eEnable Comet with \u003ccode\u003euse_comet: True\u003c/code\u003e. You can also customize where data is being logged with \u003ccode\u003ecomet_workspace\u003c/code\u003e and \u003ccode\u003ecomet_project\u003c/code\u003e. A full example config with comet enabled is provided in \u003ccode\u003econfigs/local_setup_comet.yml\u003c/code\u003e.\u003c/li\u003e\n\u003cli\u003eRun your experiment, and monitor metrics in the Comet workspace that you passed!\u003c/li\u003e\n\u003c/ol\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eRunning on multi-node\u003c/h1\u003e\u003ca id=\"user-content-running-on-multi-node\" class=\"anchor\" aria-label=\"Permalink: Running on multi-node\" href=\"#running-on-multi-node\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIf you need to supply a hostfile for use with the MPI-based DeepSpeed launcher, you can set the environment variable \u003ccode\u003eDLTS_HOSTFILE\u003c/code\u003e to point to the hostfile.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eProfiling\u003c/h1\u003e\u003ca id=\"user-content-profiling\" class=\"anchor\" aria-label=\"Permalink: Profiling\" href=\"#profiling\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe support profiling with Nsight Systems, the PyTorch Profiler, and PyTorch Memory Profiling.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eNsight Systems Profiling\u003c/h2\u003e\u003ca id=\"user-content-nsight-systems-profiling\" class=\"anchor\" aria-label=\"Permalink: Nsight Systems Profiling\" href=\"#nsight-systems-profiling\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo use the Nsight Systems profiling, set config options \u003ccode\u003eprofile\u003c/code\u003e, \u003ccode\u003eprofile_step_start\u003c/code\u003e, and \u003ccode\u003eprofile_step_stop\u003c/code\u003e (see \u003ca href=\"https://github.com/EleutherAI/gpt-neox/blob/main/configs/neox_arguments.md\"\u003ehere\u003c/a\u003e for argument usage, and \u003ca href=\"https://github.com/EleutherAI/gpt-neox/blob/main/configs/prof.yml\"\u003ehere\u003c/a\u003e for a sample config).\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo populate nsys metrics, launch training with:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"nsys profile -s none -t nvtx,cuda -o \u0026lt;path/to/profiling/output\u0026gt; --force-overwrite true \\\n--capture-range=cudaProfilerApi --capture-range-end=stop python $TRAIN_PATH/deepy.py \\\n$TRAIN_PATH/train.py --conf_dir configs \u0026lt;config files\u0026gt;\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003ensys profile -s none -t nvtx,cuda -o \u0026lt;path/to/profiling/output\u0026gt; --force-overwrite true \\\n--capture-range=cudaProfilerApi --capture-range-end=stop python $TRAIN_PATH/deepy.py \\\n$TRAIN_PATH/train.py --conf_dir configs \u0026lt;config files\u0026gt;\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe generated output file can then by viewed with the Nsight Systems GUI:\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer\" href=\"/EleutherAI/gpt-neox/blob/main/images/nsight_profiling.png\"\u003e\u003cimg src=\"/EleutherAI/gpt-neox/raw/main/images/nsight_profiling.png\" alt=\"nsight-prof\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePyTorch Profiling\u003c/h2\u003e\u003ca id=\"user-content-pytorch-profiling\" class=\"anchor\" aria-label=\"Permalink: PyTorch Profiling\" href=\"#pytorch-profiling\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo use the built-in PyTorch profiler, set config options \u003ccode\u003eprofile\u003c/code\u003e, \u003ccode\u003eprofile_step_start\u003c/code\u003e, and \u003ccode\u003eprofile_step_stop\u003c/code\u003e (see \u003ca href=\"https://github.com/EleutherAI/gpt-neox/blob/main/configs/neox_arguments.md\"\u003ehere\u003c/a\u003e for argument usage, and \u003ca href=\"https://github.com/EleutherAI/gpt-neox/blob/main/configs/prof.yml\"\u003ehere\u003c/a\u003e for a sample config).\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe PyTorch profiler will save traces to your \u003ccode\u003etensorboard\u003c/code\u003e log directory. You can view these traces within\nTensorBoard by following the steps \u003ca href=\"https://pytorch.org/tutorials/intermediate/tensorboard_profiler_tutorial.html\" rel=\"nofollow\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer\" href=\"/EleutherAI/gpt-neox/blob/main/images/pytorch_profiling.png\"\u003e\u003cimg src=\"/EleutherAI/gpt-neox/raw/main/images/pytorch_profiling.png\" alt=\"torch-prof\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePyTorch Memory Profiling\u003c/h2\u003e\u003ca id=\"user-content-pytorch-memory-profiling\" class=\"anchor\" aria-label=\"Permalink: PyTorch Memory Profiling\" href=\"#pytorch-memory-profiling\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo use PyTorch Memory Profiling, set config options \u003ccode\u003ememory_profiling\u003c/code\u003e and \u003ccode\u003ememory_profiling_path\u003c/code\u003e (see \u003ca href=\"https://github.com/EleutherAI/gpt-neox/blob/main/configs/neox_arguments.md\"\u003ehere\u003c/a\u003e for argument usage, and \u003ca href=\"https://github.com/EleutherAI/gpt-neox/blob/main/configs/prof.yml\"\u003ehere\u003c/a\u003e for a sample config).\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer\" href=\"/EleutherAI/gpt-neox/blob/main/images/memory_profiling.png\"\u003e\u003cimg src=\"/EleutherAI/gpt-neox/raw/main/images/memory_profiling.png\" alt=\"mem-prof\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eView the generated profile with the \u003ca href=\"https://github.com/pytorch/pytorch/blob/main/torch/cuda/_memory_viz.py\"\u003ememory_viz.py\u003c/a\u003e script. Run with:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"python _memory_viz.py trace_plot \u0026lt;generated_profile\u0026gt; -o trace.html\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003epython _memory_viz.py trace_plot \u0026lt;generated_profile\u0026gt; -o trace.html\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eAdoption and Publications\u003c/h1\u003e\u003ca id=\"user-content-adoption-and-publications\" class=\"anchor\" aria-label=\"Permalink: Adoption and Publications\" href=\"#adoption-and-publications\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe GPT-NeoX library was been widely adopted by academic and industry researchers and ported on to many HPC systems.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIf you have found this library useful in your research, please reach out and let us know! We would love to add you to our lists.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePublications\u003c/h2\u003e\u003ca id=\"user-content-publications\" class=\"anchor\" aria-label=\"Permalink: Publications\" href=\"#publications\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eEleutherAI and our collaborators have used it in the following publications:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003cstrong\u003eSid Black\u003c/strong\u003e, \u003cstrong\u003eStella Biderman\u003c/strong\u003e, \u003cstrong\u003eEric Hallahan\u003c/strong\u003e, \u003cstrong\u003eQuentin Anthony\u003c/strong\u003e, \u003cstrong\u003eLeo Gao\u003c/strong\u003e, \u003cstrong\u003eLaurence Golding\u003c/strong\u003e, \u003cstrong\u003eHorace He\u003c/strong\u003e, \u003cstrong\u003eConnor Leahy\u003c/strong\u003e, \u003cstrong\u003eKyle McDonell\u003c/strong\u003e, \u003cstrong\u003eJason Phang\u003c/strong\u003e, \u003cstrong\u003eMichael Pieler\u003c/strong\u003e, \u003cstrong\u003eShivanshu Purohit\u003c/strong\u003e, \u003cstrong\u003eLaria Reynolds\u003c/strong\u003e, \u003cstrong\u003eJon Tow\u003c/strong\u003e, \u003cstrong\u003eBen Wang\u003c/strong\u003e, and \u003cstrong\u003eSamuel Weinbach\u003c/strong\u003e. \"\u003ca href=\"https://arxiv.org/abs/2204.06745\" rel=\"nofollow\"\u003eGPT-NeoX-20B: An Open-Source Autoregressive Language Model\u003c/a\u003e.\" In \u003cem\u003eProceedings of the ACL Workshop on Challenges \u0026amp; Perspectives in Creating Large Language Models\u003c/em\u003e, 2022.\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eStella Biderman\u003c/strong\u003e, \u003cstrong\u003eHailey Schoelkopf\u003c/strong\u003e, \u003cstrong\u003eQuentin Anthony\u003c/strong\u003e, \u003cstrong\u003eHerbie Bradley\u003c/strong\u003e, \u003cstrong\u003eKyle O'Brien\u003c/strong\u003e, \u003cstrong\u003eEric Hallahan\u003c/strong\u003e, \u003cstrong\u003eMohammad Aflah Khan\u003c/strong\u003e, \u003cstrong\u003eShivanshu Purohit\u003c/strong\u003e, \u003cstrong\u003eUSVSN Sai Prashanth\u003c/strong\u003e, Edward Raff, \u003cstrong\u003eAviya Skowron\u003c/strong\u003e, \u003cstrong\u003eLintang Sutawika\u003c/strong\u003e, \u003cstrong\u003eOskar van der Wal\u003c/strong\u003e. \"\u003ca href=\"https://arxiv.org/abs/2304.01373\" rel=\"nofollow\"\u003ePythia: A suite for analyzing large language models across training and scaling\u003c/a\u003e.\" In \u003cem\u003eInternational Conference on Machine Learning\u003c/em\u003e, pp. 2397-2430. \u003cem\u003ePMLR\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eZhangir Azerbayev, Bartosz Piotrowski, \u003cstrong\u003eHailey Schoelkopf\u003c/strong\u003e, Edward W. Ayers, Dragomir Radev, and Jeremy Avigad. \"\u003ca href=\"https://arxiv.org/abs/2302.12433\" rel=\"nofollow\"\u003eProofnet: Autoformalizing and formally proving undergraduate-level mathematics\u003c/a\u003e. \u003cem\u003earXiv preprint arXiv:2302.12433\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eStella Biderman\u003c/strong\u003e, \u003cstrong\u003eUSVSN Sai Prashanth\u003c/strong\u003e, \u003cstrong\u003eLintang Sutawika\u003c/strong\u003e, \u003cstrong\u003eHailey Schoelkopf\u003c/strong\u003e, \u003cstrong\u003eQuentin Anthony\u003c/strong\u003e, \u003cstrong\u003eShivanshu Purohit\u003c/strong\u003e, and Edward Raff. \"\u003ca href=\"https://arxiv.org/abs/2304.11158\" rel=\"nofollow\"\u003eEmergent and predictable memorization in large language models.\u003c/a\u003e\" In \u003cem\u003eNeural Information Processing Systems\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eHyunwoong Ko\u003c/strong\u003e, \u003cstrong\u003eKichang Yang\u003c/strong\u003e, \u003cstrong\u003eMinho Ryu\u003c/strong\u003e, \u003cstrong\u003eTaekyoon Choi\u003c/strong\u003e, \u003cstrong\u003eSeungmu Yang,\u003c/strong\u003e and Sungho Park. \"\u003ca href=\"https://arxiv.org/abs/2306.02254\" rel=\"nofollow\"\u003eA Technical Report for Polyglot-Ko: Open-Source Large-Scale Korean Language Models\u003c/a\u003e.\" \u003cem\u003earXiv preprint arXiv:2306.02254\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eKshitij Gupta, Benjamin Thérien, Adam Ibrahim, Mats Leon Richter, \u003cstrong\u003eQuentin Anthony\u003c/strong\u003e, Eugene Belilovsky, Irina Rish, and Timothée Lesort. \"\u003ca href=\"https://arxiv.org/abs/2308.04014\" rel=\"nofollow\"\u003eContinual Pre-Training of Large Language Models: How to re-warm your model?\u003c/a\u003e\" In \u003cem\u003eWorkshop on Efficient Systems for Foundation Models @ ICML\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eZhangir Azerbayev\u003c/strong\u003e, \u003cstrong\u003eHailey Schoelkopf\u003c/strong\u003e, Keiran Paster, Marco Dos Santos, Stephen McAleer, Albert Q Jiang, Jia Deng, \u003cstrong\u003eStella Biderman\u003c/strong\u003e, and Sean Welleck. \"Llemma: An open language model for mathematics\" In \u003cem\u003eMath-AI Workshop @ NeurIPS\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eAlexander Havrilla, Maksym Zhuravinskyi, Duy Phung, Aman Tiwari, Jonathan Tow, \u003cstrong\u003eStella Biderman\u003c/strong\u003e, \u003cstrong\u003eQuentin Anthony\u003c/strong\u003e, and \u003cstrong\u003eLouis Castricato\u003c/strong\u003e. \"\u003ca href=\"https://aclanthology.org/2023.emnlp-main.530/\" rel=\"nofollow\"\u003etrlX: A Framework for Large Scale Reinforcement Learning from Human Feedback\u003c/a\u003e.\" In \u003cem\u003eProceedings of the 2023 Conference on Empirical Methods in Natural Language Processing\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eQuentin Anthony\u003c/strong\u003e, \u003cstrong\u003eJacob Hatef\u003c/strong\u003e, Deepak Narayanan, \u003cstrong\u003eStella Biderman\u003c/strong\u003e, Stas Bekman, Junqi Yin, Aamir Shafi, Hari Subramoni, and Dhabaleswar Panda. \"\u003ca href=\"https://arxiv.org/abs/2401.14489\" rel=\"nofollow\"\u003eThe Case for Co-Designing Model Architectures with Hardware\u003c/a\u003e.\" In \u003cem\u003earXiv preprint\u003c/em\u003e, 2024.\u003c/li\u003e\n\u003cli\u003eAdam Ibrahim, Benjamin Thérien, Kshitij Gupta, Mats L. Richter, \u003cstrong\u003eQuentin Anthony\u003c/strong\u003e, Timothée Lesort, Eugene Belilovsky, Irina Rish. \"\u003ca href=\"https://arxiv.org/abs/2403.08763\" rel=\"nofollow\"\u003eSimple and Scalable Strategies to Continually Pre-train Large Language Models\u003c/a\u003e.\" In \u003cem\u003earXiv preprint\u003c/em\u003e, 2024.\u003c/li\u003e\n\u003cli\u003eJunqi Yin, Avishek Bose, Guojing Cong, Isaac Lyngaas, \u003cstrong\u003eQuentin Anthony\u003c/strong\u003e. \"\u003ca href=\"https://arxiv.org/abs/2402.00691\" rel=\"nofollow\"\u003eComparative Study of Large Language Model Architectures on Frontier\u003c/a\u003e.\" In \u003cem\u003earXiv preprint\u003c/em\u003e, 2024.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eThe following publications by other research groups use this library:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eTa-Chung Chi, Ting-Han Fan, Peter J. Ramadge, and Alexander Rudnicky. \"\u003ca href=\"https://arxiv.org/abs/2205.09921\" rel=\"nofollow\"\u003eKERPLE: Kernelized Relative Positional Embedding for Length Extrapolation\u003c/a\u003e.\" In \u003cem\u003eAdvances in Neural Information Processing Systems\u003c/em\u003e 35, 2022.\u003c/li\u003e\n\u003cli\u003eSameera Horawalavithana, Ellyn Ayton, Shivam Sharma, Scott Howland, Megha Subramanian, Scott Vasquez, Robin Cosbey, Maria Glenski, and Svitlana Volkova. \"\u003ca href=\"https://aclanthology.org/2022.bigscience-1.12/\" rel=\"nofollow\"\u003eFoundation Models of Scientific Knowledge for Chemistry: Opportunities, Challenges and Lessons Learned\u003c/a\u003e.\" In \u003cem\u003eProceedings of the ACL Workshop on Challenges \u0026amp; Perspectives in Creating Large Language Models\u003c/em\u003e, 2022.\u003c/li\u003e\n\u003cli\u003eSophia Kolak, Ruben Martins, Claire Le Goues, and Vincent J. Hellendoorn. \"\u003ca href=\"https://par.nsf.gov/biblio/10340618\" rel=\"nofollow\"\u003ePatch Generation with Language Models: Feasibility and Scaling Behavior\u003c/a\u003e\".\" In \u003cem\u003eProceedings of the Deep Learning for Code Workshop at ICLR\u003c/em\u003e, 2022.\u003c/li\u003e\n\u003cli\u003eFrank F. Xu, Uri Alon, Graham Neubig, and Vincent J. Hellendoorn. \"\u003ca href=\"https://arxiv.org/abs/2202.13169\" rel=\"nofollow\"\u003eA Systematic Evaluation of Large Language Models of Code\u003c/a\u003e.\" In \u003cem\u003eProceedings of the ICLR Workshop on Deep Learning For Code\u003c/em\u003e, 2022.\u003c/li\u003e\n\u003cli\u003eByung-Doh Oh and William Schuler. \"\u003ca href=\"https://arxiv.org/abs/2304.11389\" rel=\"nofollow\"\u003eTransformer-Based LM Surprisal Predicts Human Reading Times Best with About Two Billion Training Tokens\u003c/a\u003e.\" In \u003cem\u003eFindings of the Association for Computational Linguistics\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eTa-Chung Chi, Ting-Han Fan, Alexander Rudnicky, and Peter Ramadge. \"\u003ca href=\"https://aclanthology.org/2023.acl-long.756/\" rel=\"nofollow\"\u003eDissecting Transformer Length Extrapolation via the Lens of Receptive Field Analysis\u003c/a\u003e.\" In \u003cem\u003eProceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)\u003c/em\u003e, pp. 13522-13537, 2023.\u003c/li\u003e\n\u003cli\u003eTa-Chung Chi, Ting-Han Fan, Li-Wei Chen, Alexander Rudnicky, and Peter Ramadge. \"\u003ca href=\"https://aclanthology.org/2023.acl-short.102/\" rel=\"nofollow\"\u003eLatent Positional Information is in the Self-Attention Variance of Transformer Language Models Without Positional Embeddings\u003c/a\u003e.\" In \u003cem\u003eProceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)\u003c/em\u003e, pp. 13522-13537, 2023.\u003c/li\u003e\n\u003cli\u003eXidong Feng, Yicheng Luo, Ziyan Wang, Hongrui Tang, Mengyue Yang, Kun Shao, David Mguni, Yali Du, and Jun Wang. \"\u003ca href=\"https://arxiv.org/abs/2306.09200\" rel=\"nofollow\"\u003eChessGPT: Bridging Policy Learning and Language Modeling.\u003c/a\u003e\" \u003cem\u003earXiv preprint arXiv:2306.09200\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eOrion Walker Dollar, Sameera Horawalavithana, Scott Vasquez, W. James Pfaendtner, and Svitlana Volkova. \"\u003ca href=\"https://openreview.net/pdf?id=7UudBVsIrr\" rel=\"nofollow\"\u003eMolJET: Multimodal Joint Embedding Transformer for Conditional de novo Molecular Design and Multi-Property Optimization.\u003c/a\u003e\" \u003cem\u003epreprint under review\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eJean Kaddour and Qi Liu. \"\u003ca href=\"https://arxiv.org/abs/2310.01119\" rel=\"nofollow\"\u003eText Data Augmentation in Low-Resource Settings via Fine-Tuning of Large Language Models\u003c/a\u003e.\" \u003cem\u003earXiv:2310.01119\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eAlon Albalak, Liangming Pan, Colin Raffel, and William Yang Wang. \"\u003ca href=\"https://arxiv.org/abs/2312.02406\" rel=\"nofollow\"\u003eEfficient Online Data Mixing For Language Model Pre-Training\u003c/a\u003e.\" In \u003cem\u003eNeurIPS Workshop on R0-FoMo: Robustness of Few-shot and Zero-shot Learning in Large Foundation Models\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eEghbal A. Hosseini and Evelina Fedorenko. \"\u003ca href=\"https://www.biorxiv.org/content/10.1101/2023.11.05.564832v1\" rel=\"nofollow\"\u003eLarge language models implicitly learn to straighten neural sentence trajectories to construct a predictive representation of natural language\u003c/a\u003e.\" In \u003cem\u003eNeural Information Processing Systems\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eJunqi Yin, Sajal Dash, Feiyi Wang, and Mallikarjun Shankar. \"\u003ca href=\"https://dl.acm.org/doi/abs/10.1145/3581784.3613215\" rel=\"nofollow\"\u003eFORGE: Pre-Training Open Foundation Models for Science\u003c/a\u003e. In \u003cem\u003eProceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis\u003c/em\u003e, 1-13, 2023.\u003c/li\u003e\n\u003cli\u003eJean Kaddour and Qi Liu. \"\u003ca href=\"https://arxiv.org/abs/2310.01119\" rel=\"nofollow\"\u003eText Data Augmentation in Low-Resource Settings via Fine-Tuning of Large Language Models\u003c/a\u003e.\" In \u003cem\u003earXiv preprint arXiv:2310.01119\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003ePeng Di, Jianguo Li, Hang Yu, Wei Jiang, Wenting Cai, Yang Cao, Chaoyu Chen, Dajun Chen, Hongwei Chen, Liang Chen, Gang Fan, Jie Gong, Zi Gong, Wen Hu, Tingting Guo, Zhichao Lei, Ting Li, Zheng Li, Ming Liang, Cong Liao, Bingchang Liu, Jiachen Liu, Zhiwei Liu, Shaojun Lu, Min Shen, Guangpei Wang, Huan Wang, Zhi Wang, Zhaogui Xu, Jiawei Yang, Qing Ye, Gehao Zhang, Yu Zhang, Zelin Zhao, Xunjin Zheng, Hailian Zhou, Lifu Zhu, and Xianying Zhu. \"\u003ca href=\"https://arxiv.org/abs/2310.06266\" rel=\"nofollow\"\u003eCodeFuse-13B: A Pretrained Multi-lingual Code Large Language Model\u003c/a\u003e.\" In \u003cem\u003earXiv preprint arXiv:2310.06266\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eNikitha Rao, Kush Jain, Uri Alon, Claire Le Goues, and Vincent J Hellendoorn. \"\u003ca href=\"https://arxiv.org/abs/2310.01602\" rel=\"nofollow\"\u003eCAT-LM Training Language Models on Aligned Code And Tests\u003c/a\u003e.\" In \u003cem\u003e38th IEEE/ACM International Conference on Automated Software Engineering (ASE)\u003c/em\u003e, pp. 409-420. IEEE, 2023.\u003c/li\u003e\n\u003cli\u003ePratyush Patel, Esha Choukse, Chaojie Zhang, Íñigo Goiri, Brijesh Warrier, Nithish Mahalingam, Ricardo Bianchini. \"\u003ca href=\"https://arxiv.org/abs/2308.12908\" rel=\"nofollow\"\u003ePOLCA: Power Oversubscription in LLM Cloud Providers\u003c/a\u003e.\" In \u003cem\u003earXiv preprint\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eJunqi Yin, Sajal Dash, John Gounley, Feiyi Wang, and Georgia Tourassi. \"\u003ca href=\"https://link.springer.com/article/10.1007/s11227-023-05479-7\" rel=\"nofollow\"\u003eEvaluation of pre-training large language models on leadership-class supercomputers\u003c/a\u003e.\" In \u003cem\u003ethe Journal of Supercomputing\u003c/em\u003e 79, no. 18, 2023.\u003c/li\u003e\n\u003cli\u003eTal Kadosh, Niranjan Hasabnis, Vy A. Vo, Nadav Schneider, Neva Krien, Mihai Capota, Abdul Wasay, Nesreen Ahmed, Ted Willke, Guy Tamir, Yuval Pinter, Timothy Mattson, and Gal Oren. \"\u003ca href=\"https://arxiv.org/abs/2312.13322\" rel=\"nofollow\"\u003eDomain-Specific Code Language Models: Unraveling the Potential for HPC Codes and Tasks\u003c/a\u003e.\" In \u003cem\u003earXiv preprint\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eGuobin Shen, Dongcheng Zhao, Yiting Dong, Yang Li, Jindong Li, Kang Sun, and Yi Zeng. \"\u003ca href=\"https://arxiv.org/abs/2312.07625\" rel=\"nofollow\"\u003eAstrocyte-Enabled Advancements in Spiking Neural Networks for Large Language Modeling\u003c/a\u003e.\" In \u003cem\u003earXiv preprint\u003c/em\u003e, 2023.\u003c/li\u003e\n\u003cli\u003eEghbal A. Hosseini, Martin A. Schrimpf, Yian Zhang, Samuel Bowman, Noga Zaslavsky, and Evelina Fedorenko. \"\u003ca href=\"https://www.biorxiv.org/content/10.1101/2022.10.04.510681\" rel=\"nofollow\"\u003eArtificial neural network language models align neurally and behaviorally with humans even after a developmentally realistic amount of training.\u003c/a\u003e\" In \u003cem\u003eNeurobiology of Language\u003c/em\u003e, 2024.\u003c/li\u003e\n\u003cli\u003eXiongye Xiao, Chenyu Zhou, Heng Ping, Defu Cao, Yaxing Li, Yizhuo Zhou, Shixuan Li, and Paul Bogdan. \"\u003ca href=\"https://arxiv.org/abs/2402.09099\" rel=\"nofollow\"\u003eExploring Neuron Interactions and Emergence in LLMs: From the Multifractal Analysis Perspective\u003c/a\u003e.\" In \u003cem\u003earXiv preprint\u003c/em\u003e, 2024.\u003c/li\u003e\n\u003cli\u003eZhiyuan Zeng, Qipeng Guo, Zhaoye Fei, Zhangyue Yin, Yunhua Zhou, Linyang Li, Tianxiang Sun, Hang Yan, Dahua Lin, and Xipeng Qiu. \"\u003ca href=\"https://arxiv.org/abs/2402.12399\" rel=\"nofollow\"\u003eTurn Waste into Worth: Rectifying Top-k Router of MoE\u003c/a\u003e.\" In \u003cem\u003earXiv preprint\u003c/em\u003e, 2024.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eModels\u003c/h2\u003e\u003ca id=\"user-content-models\" class=\"anchor\" aria-label=\"Permalink: Models\" href=\"#models\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe following models were trained using this library:\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eEnglish LLMs\u003c/h3\u003e\u003ca id=\"user-content-english-llms\" class=\"anchor\" aria-label=\"Permalink: English LLMs\" href=\"#english-llms\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eEleutherAI's \u003ca href=\"https://huggingface.co/EleutherAI/gpt-neox-20b\" rel=\"nofollow\"\u003eGPT-NeoX-20B\u003c/a\u003e and \u003ca href=\"https://github.com/EleutherAI/pythia\"\u003ePythia (70M through 13B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eCarperAI's \u003ca href=\"https://huggingface.co/CarperAI/FIM-NeoX-1.3B\" rel=\"nofollow\"\u003eFIM-NeoX-1.3B\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eStabilityAI's \u003ca href=\"https://github.com/Stability-AI/StableLM\"\u003eStableLM (3B and 7B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eTogether.ai's \u003ca href=\"https://together.ai/blog/redpajama-models-v1\" rel=\"nofollow\"\u003eRedPajama-INCITE (3B and 7B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eCarnegie Mellon University's \u003ca href=\"https://huggingface.co/hoskinson-center/proofGPT-v0.1-6.7B\" rel=\"nofollow\"\u003eproofGPT (1.3B and 6.7B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eDampish's \u003ca href=\"https://huggingface.co/Dampish/StellarX-4B-V0.2\" rel=\"nofollow\"\u003eStellarX (2.8B and 4B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eChinese Academy of Sciences's \u003ca href=\"https://arxiv.org/abs/2312.07625\" rel=\"nofollow\"\u003eAstroSNN (1.5B)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eNon-English LLMs\u003c/h3\u003e\u003ca id=\"user-content-non-english-llms\" class=\"anchor\" aria-label=\"Permalink: Non-English LLMs\" href=\"#non-english-llms\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eEleutherAI's \u003ca href=\"https://github.com/EleutherAI/polyglot\"\u003ePolyglot-Ko (1.3B through 12.8B)\u003c/a\u003e (Korean)\u003c/li\u003e\n\u003cli\u003eKorea University's \u003ca href=\"https://github.com/nlpai-lab/KULLM\"\u003eKULLM-Polyglot (5.8B and 12.8B)\u003c/a\u003e (Korean)\u003c/li\u003e\n\u003cli\u003eStability AI's \u003ca href=\"https://huggingface.co/stabilityai/japanese-stablelm-base-alpha-7b\" rel=\"nofollow\"\u003eJapanese Stable LM (7B)\u003c/a\u003e (Japanese)\u003c/li\u003e\n\u003cli\u003eLearnItAnyway's \u003ca href=\"https://huggingface.co/LearnItAnyway/llava-polyglot-ko-1.3b-hf\" rel=\"nofollow\"\u003eLLaVA-Polyglot-Ko (1.3B)\u003c/a\u003e (Korean)\u003c/li\u003e\n\u003cli\u003eRinna Co.'s \u003ca href=\"https://huggingface.co/rinna/japanese-gpt-neox-3.6b\" rel=\"nofollow\"\u003ejapanese-gpt-neox-3.6b\u003c/a\u003e (Japanese) and \u003ca href=\"https://huggingface.co/rinna/bilingual-gpt-neox-4b\" rel=\"nofollow\"\u003ebilingual-gpt-neox-4b\u003c/a\u003e (English / Japanese)\u003c/li\u003e\n\u003cli\u003eCyberAgent's \u003ca href=\"https://huggingface.co/cyberagent/open-calm-7b\" rel=\"nofollow\"\u003eOpen-CLM (125M through 7B)\u003c/a\u003e (Japanese)\u003c/li\u003e\n\u003cli\u003eThe Hungarian Research Centre for Linguistics's \u003ca href=\"https://huggingface.co/NYTK/PULI-GPTrio\" rel=\"nofollow\"\u003ePULI GPTrio (6.7B)\u003c/a\u003e (Hungarian / English / Chinese)\u003c/li\u003e\n\u003cli\u003eThe University of Tokyo's \u003ca href=\"https://huggingface.co/Kojima777/weblab-10b\" rel=\"nofollow\"\u003eweblab-10b\u003c/a\u003e and \u003ca href=\"https://huggingface.co/Kojima777/weblab-10b-instruction-sft\" rel=\"nofollow\"\u003eweblab-10b-instruct\u003c/a\u003e (Japanese)\u003c/li\u003e\n\u003cli\u003enolando.ai's \u003ca href=\"https://blog.nolano.ai/Hi-NOLIN/\" rel=\"nofollow\"\u003eHi-NOLIN (9B)\u003c/a\u003e (English, Hindi)\u003c/li\u003e\n\u003cli\u003eRenmin University of China's \u003ca href=\"https://huggingface.co/yulan-team/YuLan-Base-12b\" rel=\"nofollow\"\u003eYuLan (12B)\u003c/a\u003e (English, Chinese)\u003c/li\u003e\n\u003cli\u003eThe Basque Center for Language Technology's \u003ca href=\"https://huggingface.co/HiTZ/latxa-70b-v1.2\" rel=\"nofollow\"\u003eLatixna (70B)\u003c/a\u003e (Basque)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCode Models\u003c/h3\u003e\u003ca id=\"user-content-code-models\" class=\"anchor\" aria-label=\"Permalink: Code Models\" href=\"#code-models\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eCarnegie Mellon University's \u003ca href=\"https://github.com/VHellendoorn/Code-LMs\"\u003ePolyCoder (160M through 2.7B)\u003c/a\u003e and \u003ca href=\"https://huggingface.co/nikitharao/catlm\" rel=\"nofollow\"\u003eCAT-LM (2.7B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eStabilityAI's \u003ca href=\"https://stability.ai/blog/stablecode-llm-generative-ai-coding\" rel=\"nofollow\"\u003eStableCode (1.3B)\u003c/a\u003e and \u003ca href=\"https://stability.ai/blog/stablecode-llm-generative-ai-coding\" rel=\"nofollow\"\u003eStableCode-Completion-Alpha (3B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eCodeFuse AI's \u003ca href=\"https://huggingface.co/codefuse-ai/CodeFuse-13B\" rel=\"nofollow\"\u003eCodeFuse (13B)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eAI for Science\u003c/h3\u003e\u003ca id=\"user-content-ai-for-science\" class=\"anchor\" aria-label=\"Permalink: AI for Science\" href=\"#ai-for-science\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eEleutherAI's \u003ca href=\"https://arxiv.org/abs/2310.10631\" rel=\"nofollow\"\u003eLLeMMA (34B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eOak Ridge National Lab's \u003ca href=\"https://github.com/at-aaims/forge\"\u003eFORGE (26B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eOak Ridge National Lab's \u003ca href=\"https://arxiv.org/abs/2402.00691\" rel=\"nofollow\"\u003eUnnamed Material Science Domain Models (7B)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003ePacific Northwest National Lab's \u003ca href=\"https://openreview.net/pdf?id=7UudBVsIrr\" rel=\"nofollow\"\u003eMolJet (undisclosed size)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eOther Modalities\u003c/h3\u003e\u003ca id=\"user-content-other-modalities\" class=\"anchor\" aria-label=\"Permalink: Other Modalities\" href=\"#other-modalities\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eRinna Co.'s \u003ca href=\"https://arxiv.org/abs/2406.12428\" rel=\"nofollow\"\u003ePSLM (7B)\u003c/a\u003e (speech / text)\u003c/li\u003e\n\u003cli\u003eUniversity College London's \u003ca href=\"https://huggingface.co/Waterhorse/chessgpt-base-v1\" rel=\"nofollow\"\u003eChessGPT-3B\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eGretel's \u003ca href=\"https://huggingface.co/gretelai/text2table\" rel=\"nofollow\"\u003eText-to-Table (3B)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eAdministrative Notes\u003c/h1\u003e\u003ca id=\"user-content-administrative-notes\" class=\"anchor\" aria-label=\"Permalink: Administrative Notes\" href=\"#administrative-notes\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCiting GPT-NeoX\u003c/h2\u003e\u003ca id=\"user-content-citing-gpt-neox\" class=\"anchor\" aria-label=\"Permalink: Citing GPT-NeoX\" href=\"#citing-gpt-neox\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIf you have found the GPT-NeoX library helpful in your work, you can cite this repository as\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-text-bibtex notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"@software{gpt-neox-library,\n title = {{GPT-NeoX: Large Scale Autoregressive Language Modeling in PyTorch}},\n author = {Andonian, Alex and Anthony, Quentin and Biderman, Stella and Black, Sid and Gali, Preetham and Gao, Leo and Hallahan, Eric and Levy-Kramer, Josh and Leahy, Connor and Nestler, Lucas and Parker, Kip and Pieler, Michael and Phang, Jason and Purohit, Shivanshu and Schoelkopf, Hailey and Stander, Dashiell and Songz, Tri and Tigges, Curt and Thérien, Benjamin and Wang, Phil and Weinbach, Samuel},\n url = {https://www.github.com/eleutherai/gpt-neox},\n doi = {10.5281/zenodo.5879544},\n month = {9},\n year = {2023},\n version = {2.0.0},\n}\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003e@software\u003c/span\u003e{\u003cspan class=\"pl-en\"\u003egpt-neox-library\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003etitle\u003c/span\u003e = \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003e{GPT-NeoX: Large Scale Autoregressive Language Modeling in PyTorch}\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003eauthor\u003c/span\u003e = \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003eAndonian, Alex and Anthony, Quentin and Biderman, Stella and Black, Sid and Gali, Preetham and Gao, Leo and Hallahan, Eric and Levy-Kramer, Josh and Leahy, Connor and Nestler, Lucas and Parker, Kip and Pieler, Michael and Phang, Jason and Purohit, Shivanshu and Schoelkopf, Hailey and Stander, Dashiell and Songz, Tri and Tigges, Curt and Thérien, Benjamin and Wang, Phil and Weinbach, Samuel\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003eurl\u003c/span\u003e = \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003ehttps://www.github.com/eleutherai/gpt-neox\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003edoi\u003c/span\u003e = \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003e10.5281/zenodo.5879544\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003emonth\u003c/span\u003e = \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003e9\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003eyear\u003c/span\u003e = \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003e2023\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003eversion\u003c/span\u003e = \u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003e2.0.0\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n}\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo cite the 20 billion parameter model named \u003ccode\u003eGPT-NeoX-20B\u003c/code\u003e, please use\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-text-bibtex notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"@inproceedings{gpt-neox-20b,\n title={{GPT-NeoX-20B}: An Open-Source Autoregressive Language Model},\n author={Black, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel},\n booktitle={Proceedings of the ACL Workshop on Challenges \\\u0026amp; Perspectives in Creating Large Language Models},\n url={https://arxiv.org/abs/2204.06745},\n year={2022}\n}\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003e@inproceedings\u003c/span\u003e{\u003cspan class=\"pl-en\"\u003egpt-neox-20b\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003etitle\u003c/span\u003e=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003e{GPT-NeoX-20B}: An Open-Source Autoregressive Language Model\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003eauthor\u003c/span\u003e=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003eBlack, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003ebooktitle\u003c/span\u003e=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003eProceedings of the ACL Workshop on Challenges \\\u0026amp; Perspectives in Creating Large Language Models\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003eurl\u003c/span\u003e=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003ehttps://arxiv.org/abs/2204.06745\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003eyear\u003c/span\u003e=\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e{\u003c/span\u003e2022\u003cspan class=\"pl-pds\"\u003e}\u003c/span\u003e\u003c/span\u003e\n}\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eContributing\u003c/h2\u003e\u003ca id=\"user-content-contributing\" class=\"anchor\" aria-label=\"Permalink: Contributing\" href=\"#contributing\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eGPT-NeoX is built by the open-source AI community, and relies on our amazing contributors! Please see our\n\u003ca href=\"/EleutherAI/gpt-neox/blob/main/CONTRIBUTING.md\"\u003econtributing\u003c/a\u003e guide for more details on our CLA, code formatting, testing,\netc.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eLicensing\u003c/h2\u003e\u003ca id=\"user-content-licensing\" class=\"anchor\" aria-label=\"Permalink: Licensing\" href=\"#licensing\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThis repository hosts code that is part of EleutherAI's GPT-NeoX project. Copyright (c) 2024, EleutherAI. Licensed under the Apache License:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"Licensed under the Apache License, Version 2.0 (the \u0026quot;License\u0026quot;);\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \u0026quot;AS IS\u0026quot; BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003eLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThis repository is based off code written by NVIDIA that is licensed under the Apache License, Version 2.0. In accordance with the Apache License, all files that are modifications of code originally written by NVIDIA maintain a NVIDIA copyright header. All files that do not contain such a header are the exclusive copyright of EleutherAI. When the NVIDIA code has been modified from its original version, that fact is noted in the copyright header. All derivative works of this repository must preserve these headers under the terms of the Apache License.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThis repository also contains code written by a number of other authors. Such contributions are marked and the relevant licensing is included where appropriate.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor full terms, see the \u003ccode\u003eLICENSE\u003c/code\u003e file. If you have any questions, comments, or concerns about licensing please email us at \u003ca href=\"mailto:contact@eleuther.ai\"\u003econtact@eleuther.ai\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eAcknowledgements\u003c/h2\u003e\u003ca id=\"user-content-acknowledgements\" class=\"anchor\" aria-label=\"Permalink: Acknowledgements\" href=\"#acknowledgements\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe run our experiments on a Kubernetes cluster provided by \u003ca href=\"https://coreweave.com/\" rel=\"nofollow\"\u003eCoreWeave\u003c/a\u003e and a Slurm cluster provided by \u003ca href=\"https://stability.ai\" rel=\"nofollow\"\u003eStability AI\u003c/a\u003e. We are thankful to the DeepSpeed team for their advice and consultation.\u003c/p\u003e\n\u003c/article\u003e","loaded":true,"timedOut":false,"errorMessage":null,"headerInfo":{"toc":[{"level":1,"text":"GPT-NeoX","anchor":"gpt-neox","htmlText":"GPT-NeoX"},{"level":2,"text":"Why GPT-NeoX?","anchor":"why-gpt-neox","htmlText":"Why GPT-NeoX?"},{"level":2,"text":"News","anchor":"news","htmlText":"News"},{"level":2,"text":"Versions","anchor":"versions","htmlText":"Versions"},{"level":1,"text":"Contents","anchor":"contents","htmlText":"Contents"},{"level":1,"text":"Quick Start","anchor":"quick-start","htmlText":"Quick Start"},{"level":2,"text":"Environment and Dependencies","anchor":"environment-and-dependencies","htmlText":"Environment and Dependencies"},{"level":3,"text":"Host Setup","anchor":"host-setup","htmlText":"Host Setup"},{"level":3,"text":"Fused Kernels","anchor":"fused-kernels","htmlText":"Fused Kernels"},{"level":3,"text":"Flash Attention","anchor":"flash-attention","htmlText":"Flash Attention"},{"level":3,"text":"Transformer Engine","anchor":"transformer-engine","htmlText":"Transformer Engine"},{"level":3,"text":"Multi-Node Launching","anchor":"multi-node-launching","htmlText":"Multi-Node Launching"},{"level":4,"text":"pdsh","anchor":"pdsh","htmlText":"pdsh"},{"level":4,"text":"MPI","anchor":"mpi","htmlText":"MPI"},{"level":4,"text":"Slurm","anchor":"slurm","htmlText":"Slurm"},{"level":4,"text":"(Advanced) Custom Launching","anchor":"advanced-custom-launching","htmlText":"(Advanced) Custom Launching"},{"level":5,"text":"1. Adding a Launcher (e.g. JSRun, Flux, etc)","anchor":"1-adding-a-launcher-eg-jsrun-flux-etc","htmlText":"1. Adding a Launcher (e.g. JSRun, Flux, etc)"},{"level":5,"text":"2. Modifying Run Command or Environment Variables","anchor":"2-modifying-run-command-or-environment-variables","htmlText":"2. Modifying Run Command or Environment Variables"},{"level":4,"text":"Hostfile Generation","anchor":"hostfile-generation","htmlText":"Hostfile Generation"},{"level":4,"text":"Job Launching","anchor":"job-launching","htmlText":"Job Launching"},{"level":3,"text":"Containerized Setup","anchor":"containerized-setup","htmlText":"Containerized Setup"},{"level":2,"text":"Usage","anchor":"usage","htmlText":"Usage"},{"level":1,"text":"Configuration","anchor":"configuration","htmlText":"Configuration"},{"level":2,"text":"Mixture of Experts","anchor":"mixture-of-experts","htmlText":"Mixture of Experts"},{"level":1,"text":"Datasets","anchor":"datasets","htmlText":"Datasets"},{"level":2,"text":"Preconfigured Datasets","anchor":"preconfigured-datasets","htmlText":"Preconfigured Datasets"},{"level":2,"text":"Using Custom Data","anchor":"using-custom-data","htmlText":"Using Custom Data"},{"level":1,"text":"Training and Finetuning","anchor":"training-and-finetuning","htmlText":"Training and Finetuning"},{"level":2,"text":"Pretrained Models","anchor":"pretrained-models","htmlText":"Pretrained Models"},{"level":3,"text":"GPT-NeoX-20B","anchor":"gpt-neox-20b","htmlText":"GPT-NeoX-20B"},{"level":3,"text":"Pythia","anchor":"pythia","htmlText":"Pythia"},{"level":3,"text":"Polyglot","anchor":"polyglot","htmlText":"Polyglot"},{"level":1,"text":"Inference","anchor":"inference","htmlText":"Inference"},{"level":1,"text":"Evaluation","anchor":"evaluation","htmlText":"Evaluation"},{"level":1,"text":"Exporting to Hugging Face","anchor":"exporting-to-hugging-face","htmlText":"Exporting to Hugging Face"},{"level":3,"text":"Importing Models Into GPT-NeoX","anchor":"importing-models-into-gpt-neox","htmlText":"Importing Models Into GPT-NeoX"},{"level":1,"text":"Monitoring","anchor":"monitoring","htmlText":"Monitoring"},{"level":2,"text":"Weights and Biases","anchor":"weights-and-biases","htmlText":"Weights and Biases"},{"level":2,"text":"TensorBoard","anchor":"tensorboard","htmlText":"TensorBoard"},{"level":2,"text":"Comet","anchor":"comet","htmlText":"Comet"},{"level":1,"text":"Running on multi-node","anchor":"running-on-multi-node","htmlText":"Running on multi-node"},{"level":1,"text":"Profiling","anchor":"profiling","htmlText":"Profiling"},{"level":2,"text":"Nsight Systems Profiling","anchor":"nsight-systems-profiling","htmlText":"Nsight Systems Profiling"},{"level":2,"text":"PyTorch Profiling","anchor":"pytorch-profiling","htmlText":"PyTorch Profiling"},{"level":2,"text":"PyTorch Memory Profiling","anchor":"pytorch-memory-profiling","htmlText":"PyTorch Memory Profiling"},{"level":1,"text":"Adoption and Publications","anchor":"adoption-and-publications","htmlText":"Adoption and Publications"},{"level":2,"text":"Publications","anchor":"publications","htmlText":"Publications"},{"level":2,"text":"Models","anchor":"models","htmlText":"Models"},{"level":3,"text":"English LLMs","anchor":"english-llms","htmlText":"English LLMs"},{"level":3,"text":"Non-English LLMs","anchor":"non-english-llms","htmlText":"Non-English LLMs"},{"level":3,"text":"Code Models","anchor":"code-models","htmlText":"Code Models"},{"level":3,"text":"AI for Science","anchor":"ai-for-science","htmlText":"AI for Science"},{"level":3,"text":"Other Modalities","anchor":"other-modalities","htmlText":"Other Modalities"},{"level":1,"text":"Administrative Notes","anchor":"administrative-notes","htmlText":"Administrative Notes"},{"level":2,"text":"Citing GPT-NeoX","anchor":"citing-gpt-neox","htmlText":"Citing GPT-NeoX"},{"level":2,"text":"Contributing","anchor":"contributing","htmlText":"Contributing"},{"level":2,"text":"Licensing","anchor":"licensing","htmlText":"Licensing"},{"level":2,"text":"Acknowledgements","anchor":"acknowledgements","htmlText":"Acknowledgements"}],"siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FEleutherAI%2Fgpt-neox"}},{"displayName":"LICENSE","repoName":"gpt-neox","refName":"main","path":"LICENSE","preferredFileType":"license","tabName":"Apache-2.0","richText":null,"loaded":false,"timedOut":false,"errorMessage":null,"headerInfo":{"toc":null,"siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FEleutherAI%2Fgpt-neox"}}],"overviewFilesProcessingTime":0}},"appPayload":{"helpUrl":"https://docs.github.com","findFileWorkerPath":"/assets-cdn/worker/find-file-worker-7d7eb7c71814.js","findInFileWorkerPath":"/assets-cdn/worker/find-in-file-worker-96e76d5fdb2c.js","githubDevUrl":null,"enabled_features":{"copilot_workspace":null,"code_nav_ui_events":false,"overview_shared_code_dropdown_button":false,"react_blob_overlay":false,"accessible_code_button":true,"github_models_repo_integration":false}}}}</script> <div data-target="react-partial.reactRoot"><style data-styled="true" data-styled-version="5.3.11">.iVEunk{margin-top:16px;margin-bottom:16px;}/*!sc*/ .jzuOtQ{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between;}/*!sc*/ .bGojzy{margin-bottom:0;display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;row-gap:16px;}/*!sc*/ .iNSVHo{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between;-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;padding-bottom:16px;padding-top:8px;}/*!sc*/ .bVgnfw{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:row;-ms-flex-direction:row;flex-direction:row;gap:8px;}/*!sc*/ @media screen and (max-width:320px){.bVgnfw{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}}/*!sc*/ .CEgMp{position:relative;}/*!sc*/ @media screen and (max-width:380px){.CEgMp .ref-selector-button-text-container{max-width:80px;}}/*!sc*/ @media screen and (max-width:320px){.CEgMp{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}.CEgMp .overview-ref-selector{width:100%;}.CEgMp .overview-ref-selector > span{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-pack:start;-webkit-justify-content:flex-start;-ms-flex-pack:start;justify-content:flex-start;}.CEgMp .overview-ref-selector > span > span[data-component="text"]{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}}/*!sc*/ .gMOVLe[data-size="medium"]{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;min-width:0;}/*!sc*/ .gMOVLe[data-size="medium"] svg{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));}/*!sc*/ .gMOVLe[data-size="medium"] > span{width:inherit;}/*!sc*/ .gUkoLg{-webkit-box-pack:center;-webkit-justify-content:center;-ms-flex-pack:center;justify-content:center;}/*!sc*/ .bZBlpz{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;width:100%;}/*!sc*/ .lhTYNA{margin-right:4px;color:var(--fgColor-muted,var(--color-fg-muted,#656d76));}/*!sc*/ .ffLUq{font-size:14px;min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;}/*!sc*/ .bmcJak{min-width:0;}/*!sc*/ .fLXEGX{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (max-width:1079px){.fLXEGX{display:none;}}/*!sc*/ .lmSMZJ[data-size="medium"]{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));padding-left:4px;padding-right:4px;}/*!sc*/ .lmSMZJ[data-size="medium"] span[data-component="leadingVisual"]{margin-right:4px !important;}/*!sc*/ .dqfxud{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (min-width:1080px){.dqfxud{display:none;}}/*!sc*/ @media screen and (max-width:543px){.dqfxud{display:none;}}/*!sc*/ .fGwBZA[data-size="medium"][data-no-visuals]{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));}/*!sc*/ .jxTzTd{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;padding-left:8px;gap:8px;}/*!sc*/ .gqqBXN{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;gap:8px;}/*!sc*/ @media screen and (max-width:543px){.gqqBXN{display:none;}}/*!sc*/ .dzXgxt{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (max-width:1011px){.dzXgxt{display:none;}}/*!sc*/ .iWFGlI{margin-left:8px;margin-right:8px;margin:0;}/*!sc*/ .vcvyP{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;min-width:160px;}/*!sc*/ .YUPas{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (min-width:1012px){.YUPas{display:none;}}/*!sc*/ .izFOf{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (min-width:544px){.izFOf{display:none;}}/*!sc*/ .vIPPs{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;gap:16px;}/*!sc*/ .fdROMU{width:100%;border-collapse:separate;border-spacing:0;border:1px solid;border-color:var(--borderColor-default,var(--color-border-default,#d0d7de));border-radius:6px;table-layout:fixed;overflow:unset;}/*!sc*/ .jGKpsv{height:0px;line-height:0px;}/*!sc*/ .jGKpsv tr{height:0px;font-size:0px;}/*!sc*/ .jdgHnn{padding:16px;color:var(--fgColor-muted,var(--color-fg-muted,#656d76));font-size:12px;text-align:left;height:40px;}/*!sc*/ .jdgHnn th{padding-left:16px;background-color:var(--bgColor-muted,var(--color-canvas-subtle,#f6f8fa));}/*!sc*/ .bQivRW{width:100%;border-top-left-radius:6px;}/*!sc*/ @media screen and (min-width:544px){.bQivRW{display:none;}}/*!sc*/ .ldkMIO{width:40%;border-top-left-radius:6px;}/*!sc*/ @media screen and (max-width:543px){.ldkMIO{display:none;}}/*!sc*/ .jMbWeI{text-align:right;padding-right:16px;width:136px;border-top-right-radius:6px;}/*!sc*/ .gpqjiB{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));font-size:12px;height:40px;}/*!sc*/ .dzCJzi{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:row;-ms-flex-direction:row;flex-direction:row;-webkit-flex-wrap:wrap;-ms-flex-wrap:wrap;flex-wrap:wrap;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;gap:8px;min-width:273px;padding:8px;}/*!sc*/ @media screen and (min-width:544px){.dzCJzi{-webkit-flex-wrap:nowrap;-ms-flex-wrap:nowrap;flex-wrap:nowrap;}}/*!sc*/ .eNCcrz{text-align:center;vertical-align:center;height:40px;border-top:1px solid;border-color:var(--borderColor-default,var(--color-border-default,#d0d7de));}/*!sc*/ .bHTcCe{border-top:1px solid var(--borderColor-default,var(--color-border-default));cursor:pointer;}/*!sc*/ .csrIcr{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;gap:16px;}/*!sc*/ .bUQNHB{border:1px solid;border-color:var(--borderColor-default,var(--color-border-default,#d0d7de));border-radius:6px;display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}/*!sc*/ @media screen and (max-width:543px){.bUQNHB{margin-left:-16px;margin-right:-16px;max-width:calc(100% + 32px);}}/*!sc*/ @media screen and (min-width:544px){.bUQNHB{max-width:100%;}}/*!sc*/ .jPdcfu{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;border-bottom:1px solid;border-bottom-color:var(--borderColor-default,var(--color-border-default,#d0d7de));-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;padding-right:8px;position:-webkit-sticky;position:sticky;top:0;background-color:var(--bgColor-default,var(--color-canvas-default,#ffffff));z-index:1;border-top-left-radius:6px;border-top-right-radius:6px;}/*!sc*/ .iphEWz{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;border-bottom:none;max-width:100%;padding-left:8px;padding-right:8px;}/*!sc*/ .hUCRAk{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;}/*!sc*/ .cwoBXV[data-size="medium"]{color:var(--fgColor-muted,var(--color-fg-subtle,#6e7781));padding-left:8px;padding-right:8px;}/*!sc*/ .QkQOb{padding:32px;overflow:auto;}/*!sc*/ data-styled.g1[id="Box-sc-g0xbh4-0"]{content:"iVEunk,jzuOtQ,bGojzy,iNSVHo,bVgnfw,CEgMp,gMOVLe,gUkoLg,bZBlpz,lhTYNA,ffLUq,bmcJak,fLXEGX,lmSMZJ,dqfxud,fGwBZA,jxTzTd,gqqBXN,dzXgxt,iWFGlI,vcvyP,YUPas,izFOf,vIPPs,fdROMU,jGKpsv,jdgHnn,bQivRW,ldkMIO,jMbWeI,gpqjiB,dzCJzi,eNCcrz,bHTcCe,csrIcr,bUQNHB,jPdcfu,iphEWz,hUCRAk,cwoBXV,QkQOb,"}/*!sc*/ .brGdpi{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;-webkit-clip:rect(0,0,0,0);clip:rect(0,0,0,0);white-space:nowrap;border-width:0;}/*!sc*/ data-styled.g6[id="_VisuallyHidden__VisuallyHidden-sc-11jhm7a-0"]{content:"brGdpi,"}/*!sc*/ .hWlpPn{position:relative;display:inline-block;}/*!sc*/ .hWlpPn::after{position:absolute;z-index:1000000;display:none;padding:0.5em 0.75em;font:normal normal 11px/1.5 -apple-system,BlinkMacSystemFont,"Segoe UI","Noto Sans",Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji";-webkit-font-smoothing:subpixel-antialiased;color:var(--tooltip-fgColor,var(--fgColor-onEmphasis,var(--color-fg-on-emphasis,#ffffff)));text-align:center;-webkit-text-decoration:none;text-decoration:none;text-shadow:none;text-transform:none;-webkit-letter-spacing:normal;-moz-letter-spacing:normal;-ms-letter-spacing:normal;letter-spacing:normal;word-wrap:break-word;white-space:pre;pointer-events:none;content:attr(aria-label);background:var(--tooltip-bgColor,var(--bgColor-emphasis,var(--color-neutral-emphasis-plus,#24292f)));border-radius:6px;opacity:0;}/*!sc*/ @-webkit-keyframes tooltip-appear{from{opacity:0;}to{opacity:1;}}/*!sc*/ @keyframes tooltip-appear{from{opacity:0;}to{opacity:1;}}/*!sc*/ .hWlpPn:hover::after,.hWlpPn:active::after,.hWlpPn:focus::after,.hWlpPn:focus-within::after{display:inline-block;-webkit-text-decoration:none;text-decoration:none;-webkit-animation-name:tooltip-appear;animation-name:tooltip-appear;-webkit-animation-duration:0.1s;animation-duration:0.1s;-webkit-animation-fill-mode:forwards;animation-fill-mode:forwards;-webkit-animation-timing-function:ease-in;animation-timing-function:ease-in;-webkit-animation-delay:0s;animation-delay:0s;}/*!sc*/ .hWlpPn.tooltipped-no-delay:hover::after,.hWlpPn.tooltipped-no-delay:active::after,.hWlpPn.tooltipped-no-delay:focus::after,.hWlpPn.tooltipped-no-delay:focus-within::after{-webkit-animation-delay:0s;animation-delay:0s;}/*!sc*/ .hWlpPn.tooltipped-multiline:hover::after,.hWlpPn.tooltipped-multiline:active::after,.hWlpPn.tooltipped-multiline:focus::after,.hWlpPn.tooltipped-multiline:focus-within::after{display:table-cell;}/*!sc*/ .hWlpPn.tooltipped-s::after,.hWlpPn.tooltipped-se::after,.hWlpPn.tooltipped-sw::after{top:100%;right:50%;margin-top:6px;}/*!sc*/ .hWlpPn.tooltipped-se::after{right:auto;left:50%;margin-left:-16px;}/*!sc*/ .hWlpPn.tooltipped-sw::after{margin-right:-16px;}/*!sc*/ .hWlpPn.tooltipped-n::after,.hWlpPn.tooltipped-ne::after,.hWlpPn.tooltipped-nw::after{right:50%;bottom:100%;margin-bottom:6px;}/*!sc*/ .hWlpPn.tooltipped-ne::after{right:auto;left:50%;margin-left:-16px;}/*!sc*/ .hWlpPn.tooltipped-nw::after{margin-right:-16px;}/*!sc*/ .hWlpPn.tooltipped-s::after,.hWlpPn.tooltipped-n::after{-webkit-transform:translateX(50%);-ms-transform:translateX(50%);transform:translateX(50%);}/*!sc*/ .hWlpPn.tooltipped-w::after{right:100%;bottom:50%;margin-right:6px;-webkit-transform:translateY(50%);-ms-transform:translateY(50%);transform:translateY(50%);}/*!sc*/ .hWlpPn.tooltipped-e::after{bottom:50%;left:100%;margin-left:6px;-webkit-transform:translateY(50%);-ms-transform:translateY(50%);transform:translateY(50%);}/*!sc*/ .hWlpPn.tooltipped-multiline::after{width:-webkit-max-content;width:-moz-max-content;width:max-content;max-width:250px;word-wrap:break-word;white-space:pre-line;border-collapse:separate;}/*!sc*/ .hWlpPn.tooltipped-multiline.tooltipped-s::after,.hWlpPn.tooltipped-multiline.tooltipped-n::after{right:auto;left:50%;-webkit-transform:translateX(-50%);-ms-transform:translateX(-50%);transform:translateX(-50%);}/*!sc*/ .hWlpPn.tooltipped-multiline.tooltipped-w::after,.hWlpPn.tooltipped-multiline.tooltipped-e::after{right:100%;}/*!sc*/ .hWlpPn.tooltipped-align-right-2::after{right:0;margin-right:0;}/*!sc*/ .hWlpPn.tooltipped-align-left-2::after{left:0;margin-left:0;}/*!sc*/ data-styled.g17[id="Tooltip__TooltipBase-sc-17tf59c-0"]{content:"hWlpPn,"}/*!sc*/ .liVpTx{display:inline-block;overflow:hidden;text-overflow:ellipsis;vertical-align:top;white-space:nowrap;max-width:125px;}/*!sc*/ data-styled.g19[id="Truncate__StyledTruncate-sc-23o1d2-0"]{content:"liVpTx,"}/*!sc*/ </style> <!-- --> <!-- --> <div class="Box-sc-g0xbh4-0 iVEunk"><div class="Box-sc-g0xbh4-0 jzuOtQ"><div class="Box-sc-g0xbh4-0 bGojzy"></div></div><div class="Box-sc-g0xbh4-0 iNSVHo"><div class="Box-sc-g0xbh4-0 bVgnfw"><div class="Box-sc-g0xbh4-0 CEgMp"><button type="button" aria-haspopup="true" aria-expanded="false" tabindex="0" aria-label="main branch" data-testid="anchor-button" class="Box-sc-g0xbh4-0 gMOVLe prc-Button-ButtonBase-c50BI overview-ref-selector width-full" data-loading="false" data-size="medium" data-variant="default" aria-describedby="branch-picker-repos-header-ref-selector-loading-announcement" id="branch-picker-repos-header-ref-selector"><span data-component="buttonContent" class="Box-sc-g0xbh4-0 gUkoLg prc-Button-ButtonContent-HKbr-"><span data-component="text" class="prc-Button-Label-pTQ3x"><div class="Box-sc-g0xbh4-0 bZBlpz"><div class="Box-sc-g0xbh4-0 lhTYNA"><svg aria-hidden="true" focusable="false" class="octicon octicon-git-branch" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path></svg></div><div class="Box-sc-g0xbh4-0 ffLUq ref-selector-button-text-container"><span class="Box-sc-g0xbh4-0 bmcJak prc-Text-Text-0ima0"> <!-- -->main</span></div></div></span><span data-component="trailingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-triangle-down" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m4.427 7.427 3.396 3.396a.25.25 0 0 0 .354 0l3.396-3.396A.25.25 0 0 0 11.396 7H4.604a.25.25 0 0 0-.177.427Z"></path></svg></span></span></button><button hidden="" data-hotkey-scope="read-only-cursor-text-area"></button></div><div class="Box-sc-g0xbh4-0 fLXEGX"><a style="--button-color:fg.muted" type="button" href="/EleutherAI/gpt-neox/branches" class="Box-sc-g0xbh4-0 lmSMZJ prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="invisible" aria-describedby=":Rclab:-loading-announcement"><span data-component="buttonContent" class="Box-sc-g0xbh4-0 gUkoLg prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-git-branch" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x">Branches</span></span></a><a style="--button-color:fg.muted" type="button" href="/EleutherAI/gpt-neox/tags" class="Box-sc-g0xbh4-0 lmSMZJ prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="invisible" aria-describedby=":Rklab:-loading-announcement"><span data-component="buttonContent" class="Box-sc-g0xbh4-0 gUkoLg prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-tag" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x">Tags</span></span></a></div><div class="Box-sc-g0xbh4-0 dqfxud"><a style="--button-color:fg.muted" type="button" aria-label="Go to Branches page" href="/EleutherAI/gpt-neox/branches" class="Box-sc-g0xbh4-0 fGwBZA prc-Button-ButtonBase-c50BI" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="invisible" aria-describedby=":Relab:-loading-announcement"><svg aria-hidden="true" focusable="false" class="octicon octicon-git-branch" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path></svg></a><a style="--button-color:fg.muted" type="button" aria-label="Go to Tags page" href="/EleutherAI/gpt-neox/tags" class="Box-sc-g0xbh4-0 fGwBZA prc-Button-ButtonBase-c50BI" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="invisible" aria-describedby=":Rmlab:-loading-announcement"><svg aria-hidden="true" focusable="false" class="octicon octicon-tag" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path></svg></a></div></div><div class="Box-sc-g0xbh4-0 jxTzTd"><div class="Box-sc-g0xbh4-0 gqqBXN"><div class="Box-sc-g0xbh4-0 dzXgxt"><!--$--><div class="Box-sc-g0xbh4-0 iWFGlI"><span class="Box-sc-g0xbh4-0 vcvyP TextInput-wrapper prc-components-TextInputWrapper-i1ofR prc-components-TextInputBaseWrapper-ueK9q" data-leading-visual="true" data-trailing-visual="true" aria-busy="false"><span class="TextInput-icon" id=":R2j5ab:" aria-hidden="true"><svg aria-hidden="true" focusable="false" class="octicon octicon-search" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path></svg></span><input type="text" aria-label="Go to file" role="combobox" aria-controls="file-results-list" aria-expanded="false" aria-haspopup="dialog" autoCorrect="off" spellcheck="false" placeholder="Go to file" aria-describedby=":R2j5ab: :R2j5abH1:" data-component="input" class="prc-components-Input-Ic-y8" value=""/><span class="TextInput-icon" id=":R2j5abH1:" aria-hidden="true"></span></span></div><!--/$--></div><div class="Box-sc-g0xbh4-0 YUPas"><button type="button" class="prc-Button-ButtonBase-c50BI" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="default" aria-describedby=":Rr5ab:-loading-announcement"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="text" class="prc-Button-Label-pTQ3x">Go to file</span></span></button></div><div class="react-directory-add-file-icon"></div><div class="react-directory-remove-file-icon"></div></div><button type="button" aria-haspopup="true" aria-expanded="false" tabindex="0" class="prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="primary" aria-describedby=":R55ab:-loading-announcement" id=":R55ab:"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-code hide-sm" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x">Code</span><span data-component="trailingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-triangle-down" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m4.427 7.427 3.396 3.396a.25.25 0 0 0 .354 0l3.396-3.396A.25.25 0 0 0 11.396 7H4.604a.25.25 0 0 0-.177.427Z"></path></svg></span></span></button><div class="Box-sc-g0xbh4-0 izFOf"><button data-component="IconButton" type="button" aria-label="Open more actions menu" aria-haspopup="true" aria-expanded="false" tabindex="0" class="prc-Button-ButtonBase-c50BI prc-Button-IconButton-szpyj" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="default" aria-describedby=":R75ab:-loading-announcement" id=":R75ab:"><svg aria-hidden="true" focusable="false" class="octicon octicon-kebab-horizontal" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M8 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3ZM1.5 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Zm13 0a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path></svg></button></div></div></div><div class="Box-sc-g0xbh4-0 vIPPs"><div data-hpc="true"><button hidden="" data-testid="focus-next-element-button" data-hotkey="j"></button><button hidden="" data-testid="focus-previous-element-button" data-hotkey="k"></button><h2 class="sr-only ScreenReaderHeading-module__userSelectNone--vW4Cq prc-Heading-Heading-6CmGO" data-testid="screen-reader-heading" id="folders-and-files">Folders and files</h2><table aria-labelledby="folders-and-files" class="Box-sc-g0xbh4-0 fdROMU"><thead class="Box-sc-g0xbh4-0 jGKpsv"><tr class="Box-sc-g0xbh4-0 jdgHnn"><th colSpan="2" class="Box-sc-g0xbh4-0 bQivRW"><span class="text-bold">Name</span></th><th colSpan="1" class="Box-sc-g0xbh4-0 ldkMIO"><span class="text-bold">Name</span></th><th class="hide-sm"><div title="Last commit message" class="Truncate__StyledTruncate-sc-23o1d2-0 liVpTx width-fit"><span class="text-bold">Last commit message</span></div></th><th colSpan="1" class="Box-sc-g0xbh4-0 jMbWeI"><div title="Last commit date" class="Truncate__StyledTruncate-sc-23o1d2-0 liVpTx width-fit"><span class="text-bold">Last commit date</span></div></th></tr></thead><tbody><tr class="Box-sc-g0xbh4-0 gpqjiB"><td colSpan="3" class="bgColor-muted p-1 rounded-top-2"><div class="Box-sc-g0xbh4-0 dzCJzi"><h2 class="sr-only ScreenReaderHeading-module__userSelectNone--vW4Cq prc-Heading-Heading-6CmGO" data-testid="screen-reader-heading">Latest commit</h2><div style="width:120px" class="Skeleton Skeleton--text" data-testid="loading"> </div><div class="d-flex flex-shrink-0 gap-2"><div data-testid="latest-commit-details" class="d-none d-sm-flex flex-items-center"></div><div class="d-flex gap-2"><h2 class="sr-only ScreenReaderHeading-module__userSelectNone--vW4Cq prc-Heading-Heading-6CmGO" data-testid="screen-reader-heading">History</h2><a href="/EleutherAI/gpt-neox/commits/main/" class="prc-Button-ButtonBase-c50BI d-none d-lg-flex LinkButton-module__code-view-link-button--xvCGA flex-items-center fgColor-default" data-loading="false" data-size="small" data-variant="invisible" aria-describedby=":Raqj8pab:-loading-announcement"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-history" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m.427 1.927 1.215 1.215a8.002 8.002 0 1 1-1.6 5.685.75.75 0 1 1 1.493-.154 6.5 6.5 0 1 0 1.18-4.458l1.358 1.358A.25.25 0 0 1 3.896 6H.25A.25.25 0 0 1 0 5.75V2.104a.25.25 0 0 1 .427-.177ZM7.75 4a.75.75 0 0 1 .75.75v2.992l2.028.812a.75.75 0 0 1-.557 1.392l-2.5-1A.751.751 0 0 1 7 8.25v-3.5A.75.75 0 0 1 7.75 4Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x"><span class="fgColor-default">2,317 Commits</span></span></span></a><div class="d-sm-none"></div><div class="d-flex d-lg-none"><span role="tooltip" aria-label="2,317 Commits" id="history-icon-button-tooltip" class="Tooltip__TooltipBase-sc-17tf59c-0 hWlpPn tooltipped-n"><a href="/EleutherAI/gpt-neox/commits/main/" class="prc-Button-ButtonBase-c50BI LinkButton-module__code-view-link-button--xvCGA flex-items-center fgColor-default" data-loading="false" data-size="small" data-variant="invisible" aria-describedby=":R1iqj8pab:-loading-announcement history-icon-button-tooltip"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-history" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m.427 1.927 1.215 1.215a8.002 8.002 0 1 1-1.6 5.685.75.75 0 1 1 1.493-.154 6.5 6.5 0 1 0 1.18-4.458l1.358 1.358A.25.25 0 0 1 3.896 6H.25A.25.25 0 0 1 0 5.75V2.104a.25.25 0 0 1 .427-.177ZM7.75 4a.75.75 0 0 1 .75.75v2.992l2.028.812a.75.75 0 0 1-.557 1.392l-2.5-1A.751.751 0 0 1 7 8.25v-3.5A.75.75 0 0 1 7.75 4Z"></path></svg></span></span></a></span></div></div></div></div></td></tr><tr class="react-directory-row undefined" id="folder-row-0"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".github" aria-label=".github, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/.github">.github</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".github" aria-label=".github, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/.github">.github</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-1"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="configs" aria-label="configs, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/configs">configs</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="configs" aria-label="configs, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/configs">configs</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-2"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="eval_tasks" aria-label="eval_tasks, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/eval_tasks">eval_tasks</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="eval_tasks" aria-label="eval_tasks, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/eval_tasks">eval_tasks</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-3"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="images" aria-label="images, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/images">images</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="images" aria-label="images, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/images">images</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-4"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="megatron" aria-label="megatron, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/megatron">megatron</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="megatron" aria-label="megatron, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/megatron">megatron</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-5"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="post-training" aria-label="post-training, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/post-training">post-training</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="post-training" aria-label="post-training, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/post-training">post-training</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-6"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="requirements" aria-label="requirements, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/requirements">requirements</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="requirements" aria-label="requirements, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/requirements">requirements</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-7"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="tests" aria-label="tests, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/tests">tests</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="tests" aria-label="tests, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/tests">tests</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-8"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="tools" aria-label="tools, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/tools">tools</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="tools" aria-label="tools, (Directory)" class="Link--primary" href="/EleutherAI/gpt-neox/tree/main/tools">tools</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-9"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".clang-format" aria-label=".clang-format, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/.clang-format">.clang-format</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".clang-format" aria-label=".clang-format, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/.clang-format">.clang-format</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-10"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".dockerignore" aria-label=".dockerignore, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/.dockerignore">.dockerignore</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".dockerignore" aria-label=".dockerignore, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/.dockerignore">.dockerignore</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-11"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".gitignore" aria-label=".gitignore, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/.gitignore">.gitignore</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".gitignore" aria-label=".gitignore, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/.gitignore">.gitignore</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-12"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".pre-commit-config.yaml" aria-label=".pre-commit-config.yaml, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/.pre-commit-config.yaml">.pre-commit-config.yaml</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".pre-commit-config.yaml" aria-label=".pre-commit-config.yaml, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/.pre-commit-config.yaml">.pre-commit-config.yaml</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-13"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CITATION.cff" aria-label="CITATION.cff, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/CITATION.cff">CITATION.cff</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CITATION.cff" aria-label="CITATION.cff, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/CITATION.cff">CITATION.cff</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-14"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CONTRIBUTING.md" aria-label="CONTRIBUTING.md, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/CONTRIBUTING.md">CONTRIBUTING.md</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="CONTRIBUTING.md" aria-label="CONTRIBUTING.md, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/CONTRIBUTING.md">CONTRIBUTING.md</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-15"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="Dockerfile" aria-label="Dockerfile, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/Dockerfile">Dockerfile</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="Dockerfile" aria-label="Dockerfile, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/Dockerfile">Dockerfile</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-16"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="LICENSE" aria-label="LICENSE, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/LICENSE">LICENSE</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="LICENSE" aria-label="LICENSE, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/LICENSE">LICENSE</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-17"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="MANIFEST.in" aria-label="MANIFEST.in, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/MANIFEST.in">MANIFEST.in</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="MANIFEST.in" aria-label="MANIFEST.in, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/MANIFEST.in">MANIFEST.in</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-18"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="README-MUP.md" aria-label="README-MUP.md, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/README-MUP.md">README-MUP.md</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="README-MUP.md" aria-label="README-MUP.md, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/README-MUP.md">README-MUP.md</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-19"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="README.md" aria-label="README.md, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/README.md">README.md</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="README.md" aria-label="README.md, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/README.md">README.md</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-20"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="deepy.py" aria-label="deepy.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/deepy.py">deepy.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="deepy.py" aria-label="deepy.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/deepy.py">deepy.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-21"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="docker-compose-dockerhub.yml" aria-label="docker-compose-dockerhub.yml, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/docker-compose-dockerhub.yml">docker-compose-dockerhub.yml</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="docker-compose-dockerhub.yml" aria-label="docker-compose-dockerhub.yml, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/docker-compose-dockerhub.yml">docker-compose-dockerhub.yml</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-22"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="docker-compose.yml" aria-label="docker-compose.yml, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/docker-compose.yml">docker-compose.yml</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="docker-compose.yml" aria-label="docker-compose.yml, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/docker-compose.yml">docker-compose.yml</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-23"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="eval.py" aria-label="eval.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/eval.py">eval.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="eval.py" aria-label="eval.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/eval.py">eval.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-24"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="generate.py" aria-label="generate.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/generate.py">generate.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="generate.py" aria-label="generate.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/generate.py">generate.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-25"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="prepare_data.py" aria-label="prepare_data.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/prepare_data.py">prepare_data.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="prepare_data.py" aria-label="prepare_data.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/prepare_data.py">prepare_data.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-26"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="train.py" aria-label="train.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/train.py">train.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="train.py" aria-label="train.py, (File)" class="Link--primary" href="/EleutherAI/gpt-neox/blob/main/train.py">train.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="Box-sc-g0xbh4-0 eNCcrz show-for-mobile" data-testid="view-all-files-row"><td colSpan="3" class="Box-sc-g0xbh4-0 bHTcCe"><div><button class="prc-Link-Link-85e08">View all files</button></div></td></tr></tbody></table></div><div class="Box-sc-g0xbh4-0 csrIcr"><div class="Box-sc-g0xbh4-0 bUQNHB"><div itemscope="" itemType="https://schema.org/abstract" class="Box-sc-g0xbh4-0 jPdcfu"><h2 class="_VisuallyHidden__VisuallyHidden-sc-11jhm7a-0 brGdpi">Repository files navigation</h2><nav class="Box-sc-g0xbh4-0 iphEWz prc-components-UnderlineWrapper-oOh5J" aria-label="Repository files"><ul class="prc-components-UnderlineItemList-b23Hf" role="list"><li class="Box-sc-g0xbh4-0 hUCRAk"><a class="prc-components-UnderlineItem-lJsg-" href="#" aria-current="page"><span data-component="icon"><svg aria-hidden="true" focusable="false" class="octicon octicon-book" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path></svg></span><span data-component="text" data-content="README">README</span></a></li><li class="Box-sc-g0xbh4-0 hUCRAk"><a class="prc-components-UnderlineItem-lJsg-" href="#"><span data-component="icon"><svg aria-hidden="true" focusable="false" class="octicon octicon-law" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"></path></svg></span><span data-component="text" data-content="Apache-2.0 license">Apache-2.0 license</span></a></li></ul></nav><button style="--button-color:fg.subtle" type="button" aria-label="Outline" aria-haspopup="true" aria-expanded="false" tabindex="0" class="Box-sc-g0xbh4-0 cwoBXV prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="invisible" aria-describedby=":Rr9ab:-loading-announcement" id=":Rr9ab:"><svg aria-hidden="true" focusable="false" class="octicon octicon-list-unordered" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M5.75 2.5h8.5a.75.75 0 0 1 0 1.5h-8.5a.75.75 0 0 1 0-1.5Zm0 5h8.5a.75.75 0 0 1 0 1.5h-8.5a.75.75 0 0 1 0-1.5Zm0 5h8.5a.75.75 0 0 1 0 1.5h-8.5a.75.75 0 0 1 0-1.5ZM2 14a1 1 0 1 1 0-2 1 1 0 0 1 0 2Zm1-6a1 1 0 1 1-2 0 1 1 0 0 1 2 0ZM2 4a1 1 0 1 1 0-2 1 1 0 0 1 0 2Z"></path></svg></button></div><div class="Box-sc-g0xbh4-0 QkQOb js-snippet-clipboard-copy-unpositioned undefined" data-hpc="true"><article class="markdown-body entry-content container-lg" itemprop="text"><p dir="auto"><a href="https://github.com/EleutherAI/gpt-neox/issues"><img src="https://camo.githubusercontent.com/f20c8011c48ae5af128033c913aa078aa1931fc213b41fd81ceb87f84414d71b/68747470733a2f2f696d672e736869656c64732e696f2f6769746875622f6973737565732f456c65757468657241492f6770742d6e656f78" alt="GitHub issues" data-canonical-src="https://img.shields.io/github/issues/EleutherAI/gpt-neox" style="max-width: 100%;"></a> <a href="https://wandb.ai/eleutherai/neox" rel="nofollow"><img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Weights &amp; Biases monitoring" height="20" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">GPT-NeoX</h1><a id="user-content-gpt-neox" class="anchor" aria-label="Permalink: GPT-NeoX" href="#gpt-neox"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">This repository records <a href="https://www.eleuther.ai" rel="nofollow">EleutherAI</a>'s library for training large-scale language models on GPUs. Our current framework is based on NVIDIA's <a href="https://github.com/NVIDIA/Megatron-LM">Megatron Language Model</a> and has been augmented with techniques from <a href="https://www.deepspeed.ai" rel="nofollow">DeepSpeed</a> as well as some novel optimizations. We aim to make this repo a centralized and accessible place to gather techniques for training large-scale autoregressive language models, and accelerate research into large-scale training. This library is in widespread use in <a href="https://github.com/EleutherAI/gpt-neox#adoption-and-publications">academic, industry, and government labs</a>, including by researchers at Oak Ridge National Lab, CarperAI, Stability AI, Together.ai, Korea University, Carnegie Mellon University, and the University of Tokyo among others. Uniquely among similar libraries GPT-NeoX supports a wide variety of systems and hardwares, including launching via Slurm, MPI, and the IBM Job Step Manager, and has been run at scale on <a href="https://aws.amazon.com/" rel="nofollow">AWS</a>, <a href="https://www.coreweave.com/" rel="nofollow">CoreWeave</a>, <a href="https://www.olcf.ornl.gov/summit/" rel="nofollow">ORNL Summit</a>, <a href="https://www.olcf.ornl.gov/frontier/" rel="nofollow">ORNL Frontier</a>, <a href="https://www.lumi-supercomputer.eu/" rel="nofollow">LUMI</a>, and others.</p> <p dir="auto"><strong>If you are not looking to train models with billions of parameters from scratch, this is likely the wrong library to use. For generic inference needs, we recommend you use the Hugging Face <code>transformers</code> library instead which supports GPT-NeoX models.</strong></p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Why GPT-NeoX?</h2><a id="user-content-why-gpt-neox" class="anchor" aria-label="Permalink: Why GPT-NeoX?" href="#why-gpt-neox"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">GPT-NeoX leverages many of the same features and technologies as the popular Megatron-DeepSpeed library but with substantially increased usability and novel optimizations. Major features include:</p> <ul dir="auto"> <li>Distributed training with ZeRO and 3D parallelism</li> <li>A wide variety of systems and hardwares, including launching via Slurm, MPI, and the IBM Job Step Manager, and has been run at scale on <a href="https://aws.amazon.com/" rel="nofollow">AWS</a>, <a href="https://www.coreweave.com/" rel="nofollow">CoreWeave</a>, Oak Ridge's <a href="https://www.olcf.ornl.gov/summit/" rel="nofollow">Summit</a> and <a href="https://www.olcf.ornl.gov/frontier/" rel="nofollow">Frontier</a>, <a href="https://hpc.pnl.gov/index.shtml" rel="nofollow">Pacific Northwest National Laboratory</a>, Argonne's <a href="https://docs.alcf.anl.gov/polaris/data-science-workflows/applications/gpt-neox/" rel="nofollow">Polaris</a>, <a href="https://www.lumi-supercomputer.eu/" rel="nofollow">LUMI</a>, and more.</li> <li>Cutting edge architectural innovations including rotary and alibi positional embeddings, parallel feedforward attention layers, and flash attention.</li> <li>Predefined configurations for popular architectures including Pythia, PaLM, Falcon, and LLaMA 1 &amp; 2</li> <li>Curriculum Learning</li> <li>Easy connections with the open source ecosystem, including Hugging Face's <a href="https://github.com/huggingface/tokenizers">tokenizers</a> and <a href="https://github.com/huggingface/transformers/">transformers</a> libraries, monitor experiments via <a href="https://wandb.ai/site" rel="nofollow">WandB</a>/<a href="https://www.comet.com/site/" rel="nofollow">Comet</a>/TensorBoard, and evaluation via our <a href="https://github.com/EleutherAI/lm-evaluation-harness">Language Model Evaluation Harness</a>.</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">News</h2><a id="user-content-news" class="anchor" aria-label="Permalink: News" href="#news"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><strong>[10/9/2024]</strong> We now support <a href="https://github.com/NVIDIA/TransformerEngine">Transformer Engine</a> integration</p> <p dir="auto"><strong>[9/9/2024]</strong> We now support preference learning via <a href="https://arxiv.org/abs/2305.18290" rel="nofollow">DPO</a>, <a href="https://arxiv.org/abs/2402.01306" rel="nofollow">KTO</a>, and reward modeling</p> <p dir="auto"><strong>[9/9/2024]</strong> We now support integration with <a href="https://www.comet.com/site/" rel="nofollow">Comet ML</a>, a machine learning monitoring platform</p> <p dir="auto"><strong>[5/21/2024]</strong> We now support <a href="https://www.rwkv.com/" rel="nofollow">RWKV</a> with pipeline parallelism!. See the PRs for <a href="https://github.com/EleutherAI/gpt-neox/pull/1198" data-hovercard-type="pull_request" data-hovercard-url="/EleutherAI/gpt-neox/pull/1198/hovercard">RWKV</a> and <a href="https://github.com/EleutherAI/gpt-neox/pull/1221" data-hovercard-type="pull_request" data-hovercard-url="/EleutherAI/gpt-neox/pull/1221/hovercard">RWKV+pipeline</a></p> <p dir="auto"><strong>[3/21/2024]</strong> We now support Mixture-of-Experts (MoE)</p> <p dir="auto"><strong>[3/17/2024]</strong> We now support AMD MI250X GPUs</p> <p dir="auto"><strong>[3/15/2024]</strong> We now support <a href="https://github.com/state-spaces/mamba">Mamba</a> with tensor parallelism! See <a href="https://github.com/EleutherAI/gpt-neox/pull/1184" data-hovercard-type="pull_request" data-hovercard-url="/EleutherAI/gpt-neox/pull/1184/hovercard">the PR</a></p> <p dir="auto"><strong>[8/10/2023]</strong> We now support checkpointing with AWS S3! Activate with the <code>s3_path</code> config option (for more detail, see <a href="https://github.com/EleutherAI/gpt-neox/pull/1010" data-hovercard-type="pull_request" data-hovercard-url="/EleutherAI/gpt-neox/pull/1010/hovercard">the PR</a>)</p> <p dir="auto"><strong>[9/20/2023]</strong> As of <a class="issue-link js-issue-link" data-error-text="Failed to load title" data-id="1901736264" data-permission-text="Title is private" data-url="https://github.com/EleutherAI/gpt-neox/issues/1035" data-hovercard-type="pull_request" data-hovercard-url="/EleutherAI/gpt-neox/pull/1035/hovercard" href="https://github.com/EleutherAI/gpt-neox/pull/1035">#1035</a>, we have deprecated Flash Attention 0.x and 1.x, and migrated support to Flash Attention 2.x. We don't believe this will cause problems, but if you have a specific use-case that requires old flash support using the latest GPT-NeoX, please raise an issue.</p> <p dir="auto"><strong>[8/10/2023]</strong> We have experimental support for LLaMA 2 and Flash Attention v2 supported in our <a href="https://github.com/EleutherAI/math-lm">math-lm</a> project that will be upstreamed later this month.</p> <p dir="auto"><strong>[5/17/2023]</strong> After fixing some miscellaneous bugs we now fully support bf16.</p> <p dir="auto"><strong>[4/11/2023]</strong> We have upgraded our Flash Attention implementation to now support Alibi positional embeddings.</p> <p dir="auto"><strong>[3/9/2023]</strong> We have released GPT-NeoX 2.0.0, an upgraded version built on the latest DeepSpeed which will be regularly synced with going forward.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Versions</h2><a id="user-content-versions" class="anchor" aria-label="Permalink: Versions" href="#versions"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Prior to 3/9/2023, GPT-NeoX relied on <a href="https://github.com/EleutherAI/DeeperSpeed">DeeperSpeed</a>, which was based on an old version of DeepSpeed (0.3.15). In order to migrate to the latest upstream DeepSpeed version while allowing users to access the old versions of GPT-NeoX and DeeperSpeed, we have introduced two versioned releases for both libraries:</p> <ul dir="auto"> <li>Version 2.0 of <a href="https://github.com/EleutherAI/gpt-neox/releases/tag/v2.0">GPT-NeoX</a> and <a href="https://github.com/EleutherAI/DeeperSpeed/releases/tag/v2.0">DeeperSpeed</a> are the latest versions built on the latest DeepSpeed, and will be maintained going forward.</li> <li>Version 1.0 of <a href="https://github.com/EleutherAI/gpt-neox/releases/tag/v1.0">GPT-NeoX</a> and <a href="https://github.com/EleutherAI/DeeperSpeed/releases/tag/v1.0">DeeperSpeed</a> maintain snapshots of the old stable versions that <a href="https://arxiv.org/abs/2204.06745" rel="nofollow">GPT-NeoX-20B</a> and the <a href="https://github.com/EleutherAI/pythia">Pythia Suite</a> were trained on.</li> </ul> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Contents</h1><a id="user-content-contents" class="anchor" aria-label="Permalink: Contents" href="#contents"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li><a href="#gpt-neox">GPT-NeoX</a> <ul dir="auto"> <li><a href="#why-gpt-neox">Why GPT-NeoX?</a></li> <li><a href="#news">News</a></li> <li><a href="#versions">Versions</a></li> </ul> </li> <li><a href="#contents">Contents</a></li> <li><a href="#quick-start">Quick Start</a> <ul dir="auto"> <li><a href="#environment-and-dependencies">Environment and Dependencies</a> <ul dir="auto"> <li><a href="#host-setup">Host Setup</a></li> <li><a href="#flash-attention">Flash Attention</a></li> <li><a href="#transformer-engine">Transformer Engine</a></li> <li><a href="#multi-node-launching">Multi-Node Launching</a></li> <li><a href="#containerized-setup">Containerized Setup</a></li> </ul> </li> <li><a href="#usage">Usage</a></li> </ul> </li> <li><a href="#configuration">Configuration</a> <ul dir="auto"> <li><a href="#mixture-of-experts">Mixture of Experts</a></li> </ul> </li> <li><a href="#datasets">Datasets</a> <ul dir="auto"> <li><a href="#preconfigured-datasets">Preconfigured Datasets</a></li> <li><a href="#using-custom-data">Using Custom Data</a></li> </ul> </li> <li><a href="#training-and-finetuning">Training and Finetuning</a> <ul dir="auto"> <li><a href="#pretrained-models">Pretrained Models</a> <ul dir="auto"> <li><a href="#gpt-neox-20b">GPT-NeoX-20B</a></li> <li><a href="#pythia">Pythia</a></li> <li><a href="#polyglot">Polyglot</a></li> </ul> </li> </ul> </li> <li><a href="#inference">Inference</a></li> <li><a href="#evaluation">Evaluation</a></li> <li><a href="#exporting-to-hugging-face">Exporting to Hugging Face</a></li> <li><a href="#monitoring">Monitoring</a> <ul dir="auto"> <li><a href="#weights-and-biases">Weights and Biases</a></li> <li><a href="#tensorboard">TensorBoard</a></li> </ul> </li> <li><a href="#running-on-multi-node">Running on multi-node</a></li> <li><a href="#profiling">Profiling</a></li> <li><a href="#adoption-and-publications">Adoption and Publications</a> <ul dir="auto"> <li><a href="#publications">Publications</a></li> <li><a href="#models">Models</a> <ul dir="auto"> <li><a href="#english-llms">English LLMs</a></li> <li><a href="#non-english-llms">Non-English LLMs</a></li> <li><a href="#code-models">Code Models</a></li> <li><a href="#other-modalities">Other Modalities</a></li> </ul> </li> </ul> </li> <li><a href="#administrative-notes">Administrative Notes</a> <ul dir="auto"> <li><a href="#citing-gpt-neox">Citing GPT-NeoX</a></li> <li><a href="#contributing">Contributing</a></li> <li><a href="#licensing">Licensing</a></li> <li><a href="#acknowledgements">Acknowledgements</a></li> </ul> </li> </ul> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Quick Start</h1><a id="user-content-quick-start" class="anchor" aria-label="Permalink: Quick Start" href="#quick-start"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Environment and Dependencies</h2><a id="user-content-environment-and-dependencies" class="anchor" aria-label="Permalink: Environment and Dependencies" href="#environment-and-dependencies"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Host Setup</h3><a id="user-content-host-setup" class="anchor" aria-label="Permalink: Host Setup" href="#host-setup"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">This codebase has primarily developed and tested for Python 3.8-3.10, and PyTorch 1.8-2.0. This is not a strict requirement, and other versions and combinations of libraries may work.</p> <p dir="auto">To install the remaining basic dependencies, run:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="pip install -r requirements/requirements.txt pip install -r requirements/requirements-wandb.txt # optional, if logging using WandB pip install -r requirements/requirements-tensorboard.txt # optional, if logging via tensorboard pip install -r requirements/requirements-comet.txt # optional, if logging via Comet"><pre>pip install -r requirements/requirements.txt pip install -r requirements/requirements-wandb.txt <span class="pl-c"><span class="pl-c">#</span> optional, if logging using WandB</span> pip install -r requirements/requirements-tensorboard.txt <span class="pl-c"><span class="pl-c">#</span> optional, if logging via tensorboard</span> pip install -r requirements/requirements-comet.txt <span class="pl-c"><span class="pl-c">#</span> optional, if logging via Comet</span></pre></div> <p dir="auto">from the repository root.</p> <div class="markdown-alert markdown-alert-warning" dir="auto"><p class="markdown-alert-title" dir="auto"><svg class="octicon octicon-alert mr-2" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="M6.457 1.047c.659-1.234 2.427-1.234 3.086 0l6.082 11.378A1.75 1.75 0 0 1 14.082 15H1.918a1.75 1.75 0 0 1-1.543-2.575Zm1.763.707a.25.25 0 0 0-.44 0L1.698 13.132a.25.25 0 0 0 .22.368h12.164a.25.25 0 0 0 .22-.368Zm.53 3.996v2.5a.75.75 0 0 1-1.5 0v-2.5a.75.75 0 0 1 1.5 0ZM9 11a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path></svg>Warning</p><p dir="auto">Our codebase relies on <a href="https://github.com/EleutherAI/DeeperSpeed">DeeperSpeed</a>, our fork of the <a href="https://github.com/microsoft/DeepSpeed">DeepSpeed</a> library with some added changes. We strongly recommend using Anaconda, a virtual machine, or some other form of environment isolation before continuing. Failure to do so may cause other repositories that rely on DeepSpeed to break.</p> </div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Fused Kernels</h3><a id="user-content-fused-kernels" class="anchor" aria-label="Permalink: Fused Kernels" href="#fused-kernels"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We now support AMD GPUs (MI100, MI250X) through JIT fused-kernel compilation. Fused kernels will be built and loaded as needed. To avoid waiting during job launching, you can also do the following for manual pre-build:</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="python from megatron.fused_kernels import load load()"><pre><span class="pl-s1">python</span> <span class="pl-k">from</span> <span class="pl-s1">megatron</span>.<span class="pl-s1">fused_kernels</span> <span class="pl-k">import</span> <span class="pl-s1">load</span> <span class="pl-en">load</span>()</pre></div> <p dir="auto">This will automatically adapts building process over different GPU vendors (AMD, NVIDIA) without platform specific code changes. To further test fused kernels using <code>pytest</code>, use <code>pytest tests/model/test_fused_kernels.py</code></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Flash Attention</h3><a id="user-content-flash-attention" class="anchor" aria-label="Permalink: Flash Attention" href="#flash-attention"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To use <a href="https://github.com/HazyResearch/flash-attention">Flash-Attention</a>, install the additional dependencies in <code>./requirements/requirements-flashattention.txt</code> or use a PyTorch NGC container with it pre-installed (note that functionality is not guaranteed using versions different from our requirements file). Then set the attention type in your configuration accordingly (see <a href="/EleutherAI/gpt-neox/blob/main/configs">configs</a>). This can provide significant speed-ups over regular attention on certain GPU architectures, including Ampere GPUs (such as A100s); see the repository for more details.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Transformer Engine</h3><a id="user-content-transformer-engine" class="anchor" aria-label="Permalink: Transformer Engine" href="#transformer-engine"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To use <a href="https://github.com/NVIDIA/TransformerEngine">Transformer Engine (TE)</a>, install the additional dependencies in <code>./requirements/requirements-transformer-engine.txt</code> or use a PyTorch NGC container with it pre-installed (note that functionality is not guaranteed using versions different from our requirements file). See <a href="https://github.com/EleutherAI/gpt-neox/blob/main/configs/1-3B-transformer-engine.yml">this config</a> for an example of using TE on a 1.3B model. This can provide significant speed-ups over regular attention on certain GPU architectures, including Ampere and Hopper GPUs; see the repository for more details.</p> <p dir="auto">TE provides very efficient kernels for both A100 and H100 GPUs. We've run some sample ablations on A100:</p> <p dir="auto">and H100:</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Multi-Node Launching</h3><a id="user-content-multi-node-launching" class="anchor" aria-label="Permalink: Multi-Node Launching" href="#multi-node-launching"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">NeoX and Deep(er)Speed support training on multiple different nodes and you have the option of using a variety of different launchers to orchestrate multi-node jobs.</p> <p dir="auto">In general there needs to be a "hostfile" somewhere accessible with the format:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="node1_ip slots=8 node2_ip slots=8"><pre>node1_ip slots=8 node2_ip slots=8</pre></div> <p dir="auto">where the first column contains the IP address for each node in your setup and the number of slots is the number of GPUs that node has access to. In your config you must pass in the path to the hostfile with <code>"hostfile": "/path/to/hostfile"</code>. Alternatively the path to the hostfile can be in the environment variable <code>DLTS_HOSTFILE</code>.</p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">pdsh</h4><a id="user-content-pdsh" class="anchor" aria-label="Permalink: pdsh" href="#pdsh"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><code>pdsh</code> is the default launcher, and if you're using <code>pdsh</code> then all you must do (besides ensuring that pdsh is installed in your environment) is set <code>{"launcher": "pdsh"}</code> in your config files.</p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">MPI</h4><a id="user-content-mpi" class="anchor" aria-label="Permalink: MPI" href="#mpi"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">If using MPI then you must specify the MPI library (DeepSpeed/GPT-NeoX currently supports <code>mvapich</code>, <code>openmpi</code>, <code>mpich</code>, and <code>impi</code>, though <code>openmpi</code> is the most commonly used and tested) as well as pass the <code>deepspeed_mpi</code> flag in your config file:</p> <div class="highlight highlight-source-json notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="{ &quot;launcher&quot;: &quot;openmpi&quot;, &quot;deepspeed_mpi&quot;: true }"><pre>{ <span class="pl-ent">"launcher"</span>: <span class="pl-s"><span class="pl-pds">"</span>openmpi<span class="pl-pds">"</span></span>, <span class="pl-ent">"deepspeed_mpi"</span>: <span class="pl-c1">true</span> }</pre></div> <p dir="auto">With your environment properly set up and the correct configuration files you can use <code>deepy.py</code> like a normal python script and start (for example) a training job with:</p> <p dir="auto"><code>python3 deepy.py train.py /path/to/configs/my_model.yml</code></p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">Slurm</h4><a id="user-content-slurm" class="anchor" aria-label="Permalink: Slurm" href="#slurm"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Using Slurm can be slightly more involved. Like with MPI, you must add the following to your config:</p> <div class="highlight highlight-source-json notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="{ &quot;launcher&quot;: &quot;slurm&quot;, &quot;deepspeed_slurm&quot;: true }"><pre>{ <span class="pl-ent">"launcher"</span>: <span class="pl-s"><span class="pl-pds">"</span>slurm<span class="pl-pds">"</span></span>, <span class="pl-ent">"deepspeed_slurm"</span>: <span class="pl-c1">true</span> }</pre></div> <p dir="auto">If you do not have ssh access to the compute nodes in your Slurm cluster you need to add <code>{"no_ssh_check": true}</code></p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">(Advanced) Custom Launching</h4><a id="user-content-advanced-custom-launching" class="anchor" aria-label="Permalink: (Advanced) Custom Launching" href="#advanced-custom-launching"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">There are many cases where the above default launching options are not sufficient</p> <ul dir="auto"> <li>Many clusters have their own unique job scheduler or specific MPI/Slurm arguments necessary for launching jobs such as <a href="https://docs.olcf.ornl.gov/systems/summit_user_guide.html#job-launcher-jsrun" rel="nofollow">Summit JSRun</a> or <a href="https://computing.llnl.gov/projects/flux-building-framework-resource-management" rel="nofollow">LLNL Flux</a></li> <li>While the above Slurm/MPI/pdsh default options are enough for most job runs, advanced users may want to add arguments for optimization or debugging purposes</li> </ul> <p dir="auto">In these cases, you will need to modify the DeepSpeed <a href="https://github.com/microsoft/DeepSpeed/blob/17957728c0362bf8ae70feca308e491e55ef9feb/deepspeed/launcher/multinode_runner.py">multinode runner</a> utility to support your usecase. Broadly, these enhancements fall under two categories:</p> <div class="markdown-heading" dir="auto"><h5 tabindex="-1" class="heading-element" dir="auto">1. Adding a Launcher (e.g. <a href="https://docs.olcf.ornl.gov/systems/summit_user_guide.html#job-launcher-jsrun" rel="nofollow">JSRun</a>, <a href="https://computing.llnl.gov/projects/flux-building-framework-resource-management" rel="nofollow">Flux</a>, etc)</h5><a id="user-content-1-adding-a-launcher-eg-jsrun-flux-etc" class="anchor" aria-label="Permalink: 1. Adding a Launcher (e.g. JSRun, Flux, etc)" href="#1-adding-a-launcher-eg-jsrun-flux-etc"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">In this case, you must add a new multinode runner class to <code>deepspeed/launcher/multinode_runner.py</code> and expose it as a configuration option in GPT-NeoX. Examples on how we did this for <a href="https://docs.olcf.ornl.gov/systems/summit_user_guide.html#job-launcher-jsrun" rel="nofollow">Summit JSRun</a> are in <a href="https://github.com/EleutherAI/DeeperSpeed/commit/9aed6c8500d7c492d85c5c88687322dbda70e370">this DeeperSpeed commit</a> and <a href="https://github.com/EleutherAI/gpt-neox/commit/3782c7ae60f8624e566e3879b89bb09e8b59b869">this GPT-NeoX commit</a>, respectively.</p> <div class="markdown-heading" dir="auto"><h5 tabindex="-1" class="heading-element" dir="auto">2. Modifying Run Command or Environment Variables</h5><a id="user-content-2-modifying-run-command-or-environment-variables" class="anchor" aria-label="Permalink: 2. Modifying Run Command or Environment Variables" href="#2-modifying-run-command-or-environment-variables"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We have encountered many cases where we wish to modify the MPI/Slurm run command for an optimization or to debug (e.g. to modify the <a href="https://slurm.schedmd.com/srun.html#OPT_cpu-bind" rel="nofollow">Slurm srun CPU binding</a> or to tag MPI logs with the rank). In this case, you must modify the multinode runner class' run command under its <code>get_cmd</code> method (e.g. <a href="https://github.com/microsoft/DeepSpeed/blob/17957728c0362bf8ae70feca308e491e55ef9feb/deepspeed/launcher/multinode_runner.py#L135-L147">mpirun_cmd</a> for OpenMPI). Examples on how we did this to provide optimized and rank-tagged run commands using Slurm and OpenMPI for the Stability cluster are in <a href="https://github.com/microsoft/DeepSpeed/compare/master...EleutherAI:DeeperSpeed:v2.0-stability">this DeeperSpeed branch</a></p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">Hostfile Generation</h4><a id="user-content-hostfile-generation" class="anchor" aria-label="Permalink: Hostfile Generation" href="#hostfile-generation"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">In general you will not be able to have a single fixed hostfile, so you need to have a script to generate one dynamically when your job starts. An example script to dynamically generate a hostfile using <a href="https://slurm.schedmd.com/documentation.html" rel="nofollow">Slurm</a> and 8 GPUs per node is:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="#!/bin/bash GPUS_PER_NODE=8 mkdir -p /sample/path/to/hostfiles # need to add the current slurm jobid to hostfile name so that we don't add to previous hostfile hostfile=/sample/path/to/hostfiles/hosts_$SLURM_JOBID # be extra sure we aren't appending to a previous hostfile rm $hostfile &amp;&gt; /dev/null # loop over the node names for i in `scontrol show hostnames $SLURM_NODELIST` do # add a line to the hostfile echo $i slots=$GPUS_PER_NODE &gt;&gt;$hostfile done"><pre><span class="pl-c"><span class="pl-c">#!</span>/bin/bash</span> GPUS_PER_NODE=8 mkdir -p /sample/path/to/hostfiles <span class="pl-c"><span class="pl-c">#</span> need to add the current slurm jobid to hostfile name so that we don't add to previous hostfile</span> hostfile=/sample/path/to/hostfiles/hosts_<span class="pl-smi">$SLURM_JOBID</span> <span class="pl-c"><span class="pl-c">#</span> be extra sure we aren't appending to a previous hostfile</span> rm <span class="pl-smi">$hostfile</span> <span class="pl-k">&amp;</span><span class="pl-k">&gt;</span> /dev/null <span class="pl-c"><span class="pl-c">#</span> loop over the node names</span> <span class="pl-k">for</span> <span class="pl-smi">i</span> <span class="pl-k">in</span> <span class="pl-s"><span class="pl-pds">`</span>scontrol show hostnames <span class="pl-smi">$SLURM_NODELIST</span><span class="pl-pds">`</span></span> <span class="pl-k">do</span> <span class="pl-c"><span class="pl-c">#</span> add a line to the hostfile</span> <span class="pl-c1">echo</span> <span class="pl-smi">$i</span> slots=<span class="pl-smi">$GPUS_PER_NODE</span> <span class="pl-k">&gt;&gt;</span><span class="pl-smi">$hostfile</span> <span class="pl-k">done</span></pre></div> <p dir="auto"><code>$SLURM_JOBID</code> and <code>$SLURM_NODELIST</code> being environment variables Slurm will create for you. See the <a href="https://slurm.schedmd.com/sbatch.html#SECTION_OUTPUT-ENVIRONMENT-VARIABLES" rel="nofollow">sbatch documentation</a> for a full list of available Slurm environment variables set at job creation time.</p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">Job Launching</h4><a id="user-content-job-launching" class="anchor" aria-label="Permalink: Job Launching" href="#job-launching"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Then you can create an <a href="https://slurm.schedmd.com/sbatch.html" rel="nofollow">sbatch</a> script from which to kick off your GPT-NeoX job. A bare-bones sbatch script on a Slurm-based cluster with 8 GPUs per node would look like this:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="#!/bin/bash #SBATCH --job-name=&quot;neox&quot; #SBATCH --partition=your-partition #SBATCH --nodes=1 #SBATCH --ntasks-per-node=8 #SBATCH --gres=gpu:8 # Some potentially useful distributed environment variables export HOSTNAMES=`scontrol show hostnames &quot;$SLURM_JOB_NODELIST&quot;` export MASTER_ADDR=$(scontrol show hostnames &quot;$SLURM_JOB_NODELIST&quot; | head -n 1) export MASTER_PORT=12802 export COUNT_NODE=`scontrol show hostnames &quot;$SLURM_JOB_NODELIST&quot; | wc -l` # Your hostfile creation script from above ./write_hostfile.sh # Tell DeepSpeed where to find our generated hostfile via DLTS_HOSTFILE export DLTS_HOSTFILE=/sample/path/to/hostfiles/hosts_$SLURM_JOBID # Launch training python3 deepy.py train.py /sample/path/to/your/configs/my_model.yml "><pre><span class="pl-c"><span class="pl-c">#!</span>/bin/bash</span> <span class="pl-c"><span class="pl-c">#</span>SBATCH --job-name="neox"</span> <span class="pl-c"><span class="pl-c">#</span>SBATCH --partition=your-partition</span> <span class="pl-c"><span class="pl-c">#</span>SBATCH --nodes=1</span> <span class="pl-c"><span class="pl-c">#</span>SBATCH --ntasks-per-node=8</span> <span class="pl-c"><span class="pl-c">#</span>SBATCH --gres=gpu:8</span> <span class="pl-c"><span class="pl-c">#</span> Some potentially useful distributed environment variables</span> <span class="pl-k">export</span> HOSTNAMES=<span class="pl-s"><span class="pl-pds">`</span>scontrol show hostnames <span class="pl-s"><span class="pl-pds">"</span><span class="pl-smi">$SLURM_JOB_NODELIST</span><span class="pl-pds">"</span></span><span class="pl-pds">`</span></span> <span class="pl-k">export</span> MASTER_ADDR=<span class="pl-s"><span class="pl-pds">$(</span>scontrol show hostnames <span class="pl-s"><span class="pl-pds">"</span><span class="pl-smi">$SLURM_JOB_NODELIST</span><span class="pl-pds">"</span></span> <span class="pl-k">|</span> head -n 1<span class="pl-pds">)</span></span> <span class="pl-k">export</span> MASTER_PORT=12802 <span class="pl-k">export</span> COUNT_NODE=<span class="pl-s"><span class="pl-pds">`</span>scontrol show hostnames <span class="pl-s"><span class="pl-pds">"</span><span class="pl-smi">$SLURM_JOB_NODELIST</span><span class="pl-pds">"</span></span> <span class="pl-k">|</span> wc -l<span class="pl-pds">`</span></span> <span class="pl-c"><span class="pl-c">#</span> Your hostfile creation script from above</span> ./write_hostfile.sh <span class="pl-c"><span class="pl-c">#</span> Tell DeepSpeed where to find our generated hostfile via DLTS_HOSTFILE</span> <span class="pl-k">export</span> DLTS_HOSTFILE=/sample/path/to/hostfiles/hosts_<span class="pl-smi">$SLURM_JOBID</span> <span class="pl-c"><span class="pl-c">#</span> Launch training</span> python3 deepy.py train.py /sample/path/to/your/configs/my_model.yml </pre></div> <p dir="auto">You can then kick off a training run with <code>sbatch my_sbatch_script.sh</code></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Containerized Setup</h3><a id="user-content-containerized-setup" class="anchor" aria-label="Permalink: Containerized Setup" href="#containerized-setup"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We also provide a Dockerfile and docker-compose configuration if you prefer to run NeoX in a container.</p> <p dir="auto">Requirements to run the container are to have appropriate GPU drivers, an up-to-date installation of Docker, and <a href="https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html" rel="nofollow">nvidia-container-toolkit</a> installed. To test if your installation is good you can use their "sample workload", which is:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi"><pre class="notranslate"><code>docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi </code></pre></div> <p dir="auto">Provided that will run, you need to export NEOX_DATA_PATH and NEOX_CHECKPOINT_PATH in your environment to specify your data directory and directory for storing and loading checkpoints:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="export NEOX_DATA_PATH=/mnt/sda/data/enwiki8 #or wherever your data is stored on your system export NEOX_CHECKPOINT_PATH=/mnt/sda/checkpoints"><pre class="notranslate"><code>export NEOX_DATA_PATH=/mnt/sda/data/enwiki8 #or wherever your data is stored on your system export NEOX_CHECKPOINT_PATH=/mnt/sda/checkpoints </code></pre></div> <p dir="auto">And then, from the gpt-neox directory, you can build the image and run a shell in a container with</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="docker compose run gpt-neox bash"><pre class="notranslate"><code>docker compose run gpt-neox bash </code></pre></div> <p dir="auto">After the build, you should be able to do this:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="mchorse@537851ed67de:~$ echo $(pwd) /home/mchorse mchorse@537851ed67de:~$ ls -al total 48 drwxr-xr-x 1 mchorse mchorse 4096 Jan 8 05:33 . drwxr-xr-x 1 root root 4096 Jan 8 04:09 .. -rw-r--r-- 1 mchorse mchorse 220 Feb 25 2020 .bash_logout -rw-r--r-- 1 mchorse mchorse 3972 Jan 8 04:09 .bashrc drwxr-xr-x 4 mchorse mchorse 4096 Jan 8 05:35 .cache drwx------ 3 mchorse mchorse 4096 Jan 8 05:33 .nv -rw-r--r-- 1 mchorse mchorse 807 Feb 25 2020 .profile drwxr-xr-x 2 root root 4096 Jan 8 04:09 .ssh drwxrwxr-x 8 mchorse mchorse 4096 Jan 8 05:35 chk drwxrwxrwx 6 root root 4096 Jan 7 17:02 data drwxr-xr-x 11 mchorse mchorse 4096 Jan 8 03:52 gpt-neox"><pre class="notranslate"><code>mchorse@537851ed67de:~$ echo $(pwd) /home/mchorse mchorse@537851ed67de:~$ ls -al total 48 drwxr-xr-x 1 mchorse mchorse 4096 Jan 8 05:33 . drwxr-xr-x 1 root root 4096 Jan 8 04:09 .. -rw-r--r-- 1 mchorse mchorse 220 Feb 25 2020 .bash_logout -rw-r--r-- 1 mchorse mchorse 3972 Jan 8 04:09 .bashrc drwxr-xr-x 4 mchorse mchorse 4096 Jan 8 05:35 .cache drwx------ 3 mchorse mchorse 4096 Jan 8 05:33 .nv -rw-r--r-- 1 mchorse mchorse 807 Feb 25 2020 .profile drwxr-xr-x 2 root root 4096 Jan 8 04:09 .ssh drwxrwxr-x 8 mchorse mchorse 4096 Jan 8 05:35 chk drwxrwxrwx 6 root root 4096 Jan 7 17:02 data drwxr-xr-x 11 mchorse mchorse 4096 Jan 8 03:52 gpt-neox </code></pre></div> <p dir="auto">For a long-running job, you should run</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="docker compose up -d"><pre class="notranslate"><code>docker compose up -d </code></pre></div> <p dir="auto">to run the container in detached mode, and then, in a separate terminal session, run</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="docker compose exec gpt-neox bash"><pre class="notranslate"><code>docker compose exec gpt-neox bash </code></pre></div> <p dir="auto">You can then run any job you want from inside the container.</p> <p dir="auto">Concerns when running for a long time or in detached mode include</p> <ul dir="auto"> <li>You will have to terminate the container manually when you are no longer using it</li> <li>If you want processes to continue running when your shell session ends, you will need to background them.</li> <li>If you then want logging, you will have to make sure to pipe logs to disk, and set up wandb and/or Comet logging.</li> </ul> <p dir="auto">If you prefer to run the prebuilt container image from dockerhub, you can run the docker compose commands with <code>-f docker-compose-dockerhub.yml</code> instead, e.g.,</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="docker compose run -f docker-compose-dockerhub.yml gpt-neox bash"><pre class="notranslate"><code>docker compose run -f docker-compose-dockerhub.yml gpt-neox bash </code></pre></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Usage</h2><a id="user-content-usage" class="anchor" aria-label="Permalink: Usage" href="#usage"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">All functionality should be launched using <code>deepy.py</code>, a wrapper around the <code>deepspeed</code> launcher.</p> <p dir="auto">We currently offer three main functions:</p> <ol dir="auto"> <li><code>train.py</code> is used for training and finetuning models.</li> <li><code>eval.py</code> is used to evaluate a trained model using the <a href="https://github.com/EleutherAI/lm-evaluation-harness">language model evaluation harness</a>.</li> <li><code>generate.py</code> is used to sample text from a trained model.</li> </ol> <p dir="auto">which can be launched with:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="./deepy.py [script.py] [./path/to/config_1.yml] [./path/to/config_2.yml] ... [./path/to/config_n.yml]"><pre>./deepy.py [script.py] [./path/to/config_1.yml] [./path/to/config_2.yml] ... [./path/to/config_n.yml]</pre></div> <p dir="auto">For example, to launch training you can run</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="./deepy.py train.py ./configs/20B.yml ./configs/local_cluster.yml"><pre>./deepy.py train.py ./configs/20B.yml ./configs/local_cluster.yml</pre></div> <p dir="auto">For more details on each entry point, see the <a href="#training-and-finetuning">Training and Finetuning</a>, <a href="#inference">Inference</a> and <a href="#evaluation">Evaluation</a> respectively.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Configuration</h1><a id="user-content-configuration" class="anchor" aria-label="Permalink: Configuration" href="#configuration"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">GPT-NeoX parameters are defined in a YAML configuration file which is passed to the deepy.py launcher. We have provided some example .yml files in <a href="/EleutherAI/gpt-neox/blob/main/configs">configs</a>, showing a diverse array of features and model sizes.</p> <p dir="auto">These files are generally complete, but non-optimal. For example, depending on your specific GPU configuration, you may need to change some settings such as <code>pipe-parallel-size</code>, <code>model-parallel-size</code> to increase or decrease the degree of parallelisation, <code>train_micro_batch_size_per_gpu</code> or <code>gradient-accumulation-steps</code> to modify batch size related settings, or the <code>zero_optimization</code> dict to modify how optimizer states are parallelised across workers.</p> <p dir="auto">For a more detailed guide to the features available and how to configure them, see <a href="/EleutherAI/gpt-neox/blob/main/configs/README.md">the configuration README</a>, and for documentation of every possible argument, see <a href="/EleutherAI/gpt-neox/blob/main/configs/neox_arguments.md">configs/neox_arguments.md</a>.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Mixture of Experts</h2><a id="user-content-mixture-of-experts" class="anchor" aria-label="Permalink: Mixture of Experts" href="#mixture-of-experts"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">GPT-NeoX includes multiple expert implementations for MoE. To select between them, specify <code>moe_type</code> of <code>megablocks</code> (default) or <code>deepspeed</code>.</p> <p dir="auto">Both are based on the DeepSpeed MoE parallelism framework, which supports tensor-expert-data parallelism. Both allow you to toggle between token-dropping and dropless (default, and this is what Megablocks was designed for). Sinkhorn routing to come soon!</p> <p dir="auto">For an example of a basic complete configuration, see configs/125M-dmoe.yml (for Megablocks dropless) or configs/125M-moe.yml.</p> <p dir="auto">Most MoE related configuration arguments are prefixed with <code>moe</code>. Some common configuration parameters and their defaults are as follows:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="moe_type: megablocks moe_num_experts: 1 # 1 disables MoE. 8 is a reasonable value. moe_loss_coeff: 0.1 expert_interval: 2 # See details below enable_expert_tensor_parallelism: false # See details below moe_expert_parallel_size: 1 # See details below moe_token_dropping: false"><pre class="notranslate"><code>moe_type: megablocks moe_num_experts: 1 # 1 disables MoE. 8 is a reasonable value. moe_loss_coeff: 0.1 expert_interval: 2 # See details below enable_expert_tensor_parallelism: false # See details below moe_expert_parallel_size: 1 # See details below moe_token_dropping: false </code></pre></div> <p dir="auto">DeepSpeed can be further configured with the following:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="moe_top_k: 1 moe_min_capacity: 4 moe_train_capacity_factor: 1.0 # Setting to 1.0 moe_eval_capacity_factor: 1.0 # Setting to 1.0"><pre class="notranslate"><code>moe_top_k: 1 moe_min_capacity: 4 moe_train_capacity_factor: 1.0 # Setting to 1.0 moe_eval_capacity_factor: 1.0 # Setting to 1.0 </code></pre></div> <p dir="auto">One MoE layer is present every <code>expert_interval</code> transformer layers including the first, so with 12 layers total:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11"><pre class="notranslate"><code>0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 </code></pre></div> <p dir="auto">Experts would be in these layers:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="0, 2, 4, 6, 8, 10"><pre class="notranslate"><code>0, 2, 4, 6, 8, 10 </code></pre></div> <p dir="auto">By default, we use expert-data parallelism, so any available tensor parallelism (<code>model_parallel_size</code>) will be used for expert routing. For instance, given the following:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="expert_parallel_size: 4 model_parallel_size: 2 # aka tensor parallelism"><pre class="notranslate"><code>expert_parallel_size: 4 model_parallel_size: 2 # aka tensor parallelism </code></pre></div> <p dir="auto">With 32 GPUs, the behavior will be look like:</p> <ul dir="auto"> <li>In non-expert layers: <ul dir="auto"> <li>Tensor parallelism is 2. (There are 32 / 2 = 16 such tensor parallel groups, each of size 2.)</li> <li>Data parallelism implicitly becomes 32 / 2 = 16.</li> </ul> </li> <li>In expert layers: <ul dir="auto"> <li>There is no tensor parallelism.</li> <li>Expert parallelism is 4. (There are 32 / 4 = 8 expert parallel groups, each of size 4.)</li> <li>Data parallelism implicitly becomes 32 / 4 = 8. Some cross-node token routing happens as a result of this redivision of data parallelism between 16 and 8. To avoid it, ensure that <code>expert_parallel_size == model_parallel_size</code>.</li> </ul> </li> </ul> <p dir="auto">Setting <code>enable_expert_tensor_parallelism</code> enables tensor-expert-data (TED) parallelism. The way to interpret the above would then be:</p> <ul dir="auto"> <li>In non-expert layers: same as before.</li> <li>In expert layers: <ul dir="auto"> <li>Tensor parallelism is 2. (There are 32 / 2 = 16 tensor parallel groups, each of size 2.)</li> <li>Expert parallelism is 4. (There are 32 / 4 = 8 expert parallel groups, each of size 4.)</li> <li>Data parallelism implicitly becomes 32 / (2 * 4) = 4. Again, cross-node token routing happens. To avoid, ensure <code>expert_parallel_size == 1</code> or <code>model_parallel_size == 1</code>.</li> </ul> </li> </ul> <p dir="auto">So note that DP must be divisible by (MP * EP). For more details, see the <a href="https://arxiv.org/abs/2303.06318" rel="nofollow">TED paper</a>.</p> <p dir="auto">Pipeline parallelism is not yet supported - coming soon!</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Datasets</h1><a id="user-content-datasets" class="anchor" aria-label="Permalink: Datasets" href="#datasets"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Preconfigured Datasets</h2><a id="user-content-preconfigured-datasets" class="anchor" aria-label="Permalink: Preconfigured Datasets" href="#preconfigured-datasets"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Several preconfigured datasets are available, including most components from <a href="https://arxiv.org/abs/2101.00027" rel="nofollow">the Pile</a>, as well as the Pile train set itself, for straightforward tokenization using the <code>prepare_data.py</code> entry point.</p> <p dir="auto">E.G, to download and tokenize the enwik8 dataset with the GPT2 Tokenizer, saving them to <code>./data</code> you can run:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="python prepare_data.py -d ./data"><pre class="notranslate"><code>python prepare_data.py -d ./data </code></pre></div> <p dir="auto">or a single shard of the pile (<code>pile_subset</code>) with the GPT-NeoX-20B tokenizer (assuming you have it saved at <code>./20B_checkpoints/20B_tokenizer.json</code>):</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="python prepare_data.py -d ./data -t HFTokenizer --vocab-file ./20B_checkpoints/20B_tokenizer.json pile_subset"><pre class="notranslate"><code>python prepare_data.py -d ./data -t HFTokenizer --vocab-file ./20B_checkpoints/20B_tokenizer.json pile_subset </code></pre></div> <p dir="auto">The tokenized data will be saved out to two files: <code>[data-dir]/[dataset-name]/[dataset-name]_text_document.bin</code>and <code>[data-dir]/[dataset-name]/[dataset-name]_text_document.idx</code>. You will need to add the prefix that both these files share to your training configuration file under the <code>data-path</code> field. E.G:</p> <div class="highlight highlight-source-yaml notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content=" &quot;data-path&quot;: &quot;./data/enwik8/enwik8_text_document&quot;,"><pre> <span class="pl-s"><span class="pl-pds">"</span><span class="pl-ent">data-path</span><span class="pl-pds">"</span></span>: <span class="pl-s"><span class="pl-pds">"</span>./data/enwik8/enwik8_text_document<span class="pl-pds">"</span></span><span class="pl-s">,</span></pre></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Using Custom Data</h2><a id="user-content-using-custom-data" class="anchor" aria-label="Permalink: Using Custom Data" href="#using-custom-data"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To prepare your own dataset for training with custom data, format it as one large <a href="https://jsonlines.org/" rel="nofollow">jsonl</a>-formatted file with each item in the list of dictionaries being a separate document. The document text should be grouped under one JSON key, i.e <code>"text"</code>. Any auxiliary data stored in other fields will not be used.</p> <p dir="auto">Next make sure to download the GPT2 tokenizer vocab, and merge files from the following links:</p> <ul dir="auto"> <li>Vocab: <a href="https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json" rel="nofollow">https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json</a></li> <li>Merge: <a href="https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt" rel="nofollow">https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt</a></li> </ul> <p dir="auto">Or use the 20B tokenizer (for which only a single Vocab file is needed):</p> <ul dir="auto"> <li>Vocab: <a href="https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/20B_tokenizer.json" rel="nofollow">https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/20B_tokenizer.json</a></li> </ul> <p dir="auto">(alternatively, you can provide any tokenizer file that can be loaded by Hugging Face's tokenizers library with the <code>Tokenizer.from_pretrained()</code> command)</p> <p dir="auto">You can now pretokenize your data using <code>tools/datasets/preprocess_data.py</code>, the arguments for which are detailed below:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="usage: preprocess_data.py [-h] --input INPUT [--jsonl-keys JSONL_KEYS [JSONL_KEYS ...]] [--num-docs NUM_DOCS] --tokenizer-type {HFGPT2Tokenizer,HFTokenizer,GPT2BPETokenizer,CharLevelTokenizer} [--vocab-file VOCAB_FILE] [--merge-file MERGE_FILE] [--append-eod] [--ftfy] --output-prefix OUTPUT_PREFIX [--dataset-impl {lazy,cached,mmap}] [--workers WORKERS] [--log-interval LOG_INTERVAL] optional arguments: -h, --help show this help message and exit input data: --input INPUT Path to input jsonl files or lmd archive(s) - if using multiple archives, put them in a comma separated list --jsonl-keys JSONL_KEYS [JSONL_KEYS ...] space separate listed of keys to extract from jsonl. Default: text --num-docs NUM_DOCS Optional: Number of documents in the input data (if known) for an accurate progress bar. tokenizer: --tokenizer-type {HFGPT2Tokenizer,HFTokenizer,GPT2BPETokenizer,CharLevelTokenizer} What type of tokenizer to use. --vocab-file VOCAB_FILE Path to the vocab file --merge-file MERGE_FILE Path to the BPE merge file (if necessary). --append-eod Append an &lt;eod&gt; token to the end of a document. --ftfy Use ftfy to clean text output data: --output-prefix OUTPUT_PREFIX Path to binary output file without suffix --dataset-impl {lazy,cached,mmap} Dataset implementation to use. Default: mmap runtime: --workers WORKERS Number of worker processes to launch --log-interval LOG_INTERVAL Interval between progress updates "><pre class="notranslate"><code>usage: preprocess_data.py [-h] --input INPUT [--jsonl-keys JSONL_KEYS [JSONL_KEYS ...]] [--num-docs NUM_DOCS] --tokenizer-type {HFGPT2Tokenizer,HFTokenizer,GPT2BPETokenizer,CharLevelTokenizer} [--vocab-file VOCAB_FILE] [--merge-file MERGE_FILE] [--append-eod] [--ftfy] --output-prefix OUTPUT_PREFIX [--dataset-impl {lazy,cached,mmap}] [--workers WORKERS] [--log-interval LOG_INTERVAL] optional arguments: -h, --help show this help message and exit input data: --input INPUT Path to input jsonl files or lmd archive(s) - if using multiple archives, put them in a comma separated list --jsonl-keys JSONL_KEYS [JSONL_KEYS ...] space separate listed of keys to extract from jsonl. Default: text --num-docs NUM_DOCS Optional: Number of documents in the input data (if known) for an accurate progress bar. tokenizer: --tokenizer-type {HFGPT2Tokenizer,HFTokenizer,GPT2BPETokenizer,CharLevelTokenizer} What type of tokenizer to use. --vocab-file VOCAB_FILE Path to the vocab file --merge-file MERGE_FILE Path to the BPE merge file (if necessary). --append-eod Append an &lt;eod&gt; token to the end of a document. --ftfy Use ftfy to clean text output data: --output-prefix OUTPUT_PREFIX Path to binary output file without suffix --dataset-impl {lazy,cached,mmap} Dataset implementation to use. Default: mmap runtime: --workers WORKERS Number of worker processes to launch --log-interval LOG_INTERVAL Interval between progress updates </code></pre></div> <p dir="auto">For example:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="python tools/datasets/preprocess_data.py \ --input ./data/mydataset.jsonl.zst \ --output-prefix ./data/mydataset \ --vocab ./data/gpt2-vocab.json \ --merge-file gpt2-merges.txt \ --dataset-impl mmap \ --tokenizer-type GPT2BPETokenizer \ --append-eod"><pre>python tools/datasets/preprocess_data.py \ --input ./data/mydataset.jsonl.zst \ --output-prefix ./data/mydataset \ --vocab ./data/gpt2-vocab.json \ --merge-file gpt2-merges.txt \ --dataset-impl mmap \ --tokenizer-type GPT2BPETokenizer \ --append-eod</pre></div> <p dir="auto">You would then run training with the following settings added to your configuration file:</p> <div class="highlight highlight-source-yaml notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content=" &quot;data-path&quot;: &quot;data/mydataset_text_document&quot;,"><pre> <span class="pl-s"><span class="pl-pds">"</span><span class="pl-ent">data-path</span><span class="pl-pds">"</span></span>: <span class="pl-s"><span class="pl-pds">"</span>data/mydataset_text_document<span class="pl-pds">"</span></span><span class="pl-s">,</span></pre></div> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Training and Finetuning</h1><a id="user-content-training-and-finetuning" class="anchor" aria-label="Permalink: Training and Finetuning" href="#training-and-finetuning"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Training is launched using <code>deepy.py</code>, a wrapper around DeepSpeed's launcher, which launches the same script in parallel across many GPUs / nodes.</p> <p dir="auto">The general usage pattern is:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="python ./deepy.py train.py [path/to/config1.yml] [path/to/config2.yml] ..."><pre>python ./deepy.py train.py [path/to/config1.yml] [path/to/config2.yml] ...</pre></div> <p dir="auto">You can pass in an arbitrary number of configs which will all be merged at runtime.</p> <p dir="auto">You can also optionally pass in a config prefix, which will assume all your configs are in the same folder and append that prefix to their path.</p> <p dir="auto">For example:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="python ./deepy.py train.py -d configs 125M.yml local_setup.yml"><pre>python ./deepy.py train.py -d configs 125M.yml local_setup.yml</pre></div> <p dir="auto">This will deploy the <code>train.py</code> script on all nodes with one process per GPU. The worker nodes and number of GPUs are specified in the <code>/job/hostfile</code> file (see <a href="/EleutherAI/gpt-neox/blob/main/configs/README.md">parameter documentation</a>), or can simply be passed in as the <code>num_gpus</code> arg if running on a single node setup.</p> <p dir="auto">Although this is not strictly necessary, we find it useful to define the model parameters in one config file (e.g <code>configs/125M.yml</code>) and the data path parameters in another (e.g <code>configs/local_setup.yml</code>).</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Pretrained Models</h2><a id="user-content-pretrained-models" class="anchor" aria-label="Permalink: Pretrained Models" href="#pretrained-models"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">GPT-NeoX-20B</h3><a id="user-content-gpt-neox-20b" class="anchor" aria-label="Permalink: GPT-NeoX-20B" href="#gpt-neox-20b"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">GPT-NeoX-20B is a 20 billion parameter autoregressive language model trained on <a href="https://arxiv.org/abs/2101.00027" rel="nofollow">the Pile</a>. Technical details about GPT-NeoX-20B can be found in <a href="https://arxiv.org/abs/2204.06745" rel="nofollow">the associated paper</a>. The configuration file for this model is both available at <a href="/EleutherAI/gpt-neox/blob/main/configs/20B.yml"><code>./configs/20B.yml</code></a> and included in the download links below.</p> <p dir="auto"><a href="https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/" rel="nofollow">Slim weights</a> - (No optimizer states, for inference or finetuning, 39GB)</p> <p dir="auto">To download from the command line to a folder named <code>20B_checkpoints</code>, use the following command:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="wget --cut-dirs=5 -nH -r --no-parent --reject &quot;index.html*&quot; https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/ -P 20B_checkpoints"><pre>wget --cut-dirs=5 -nH -r --no-parent --reject <span class="pl-s"><span class="pl-pds">"</span>index.html*<span class="pl-pds">"</span></span> https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights/ -P 20B_checkpoints</pre></div> <p dir="auto"><a href="https://the-eye.eu/public/AI/models/GPT-NeoX-20B/full_weights/" rel="nofollow">Full weights</a> - (Including optimizer states, 268GB)</p> <p dir="auto">To download from the command line to a folder named <code>20B_checkpoints</code>, use the following command:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="wget --cut-dirs=5 -nH -r --no-parent --reject &quot;index.html*&quot; https://the-eye.eu/public/AI/models/GPT-NeoX-20B/full_weights/ -P 20B_checkpoints"><pre>wget --cut-dirs=5 -nH -r --no-parent --reject <span class="pl-s"><span class="pl-pds">"</span>index.html*<span class="pl-pds">"</span></span> https://the-eye.eu/public/AI/models/GPT-NeoX-20B/full_weights/ -P 20B_checkpoints</pre></div> <p dir="auto">Weights can be alternatively be downloaded using a BitTorrent client. Torrent files can be downloaded here: <a href="https://the-eye.eu/public/AI/models/GPT-NeoX-20B/slim_weights.torrent" rel="nofollow">slim weights</a>, <a href="https://the-eye.eu/public/AI/models/GPT-NeoX-20B/full_weights.torrent" rel="nofollow">full weights</a>.</p> <p dir="auto">We additionally have 150 checkpoints saved throughout training, one every 1,000 steps. We are working on figuring out how to best serve these at scale, but in the meanwhile people interested in working with the partially trained checkpoints can email us at <a href="mailto:contact@eleuther.ai">contact@eleuther.ai</a> to arrange access.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Pythia</h3><a id="user-content-pythia" class="anchor" aria-label="Permalink: Pythia" href="#pythia"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The Pythia Scaling Suite is a suite of models ranging from 70M parameters to 12B parameters trained on <a href="https://pile.eleuther.ai" rel="nofollow">the Pile</a> intended to promote research on interpretability and training dynamics of large language models. Further details about the project and links to the models can be found in the <a href="https://arxiv.org/abs/2304.01373" rel="nofollow">in the paper</a> and <a href="https://github.com/EleutherAI/pythia">on the project's GitHub</a>.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Polyglot</h3><a id="user-content-polyglot" class="anchor" aria-label="Permalink: Polyglot" href="#polyglot"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The Polyglot Project is an effort to train powerful non-English pretrained language models to promote the accessibility of this technology to researchers outside the dominant powerhouses of machine learning. EleutherAI has trained and released 1.3B, 3.8B, and 5.8B parameter Korean language models, the largest of which outpreforms all other publicly available language models on Korean language tasks. Further details about the project and links to the models can be found <a href="https://github.com/EleutherAI/polyglot">here</a>.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Inference</h1><a id="user-content-inference" class="anchor" aria-label="Permalink: Inference" href="#inference"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><strong>For most uses we recommend deploying models trained using the GPT-NeoX library via the Hugging Face Transformers library which is better optimized for inference.</strong></p> <p dir="auto">We support three types of generation from a pretrained model:</p> <ol dir="auto"> <li>Unconditional generation</li> <li>Conditional generation based on an input read from a file</li> <li>Interactive generation, which allows for multiple rounds of back-and-forth between a user and the language model via a command line interface</li> </ol> <p dir="auto">All three types of text generation can be launched via <code>python ./deepy.py generate.py -d configs 125M.yml local_setup.yml text_generation.yml</code> with the appropriate values set in <code>configs/text_generation.yml</code>.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Evaluation</h1><a id="user-content-evaluation" class="anchor" aria-label="Permalink: Evaluation" href="#evaluation"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">GPT-NeoX supports evaluation on downstream tasks through the <a href="https://github.com/EleutherAI/lm-evaluation-harness">language model evaluation harness</a>.</p> <p dir="auto">To evaluate a trained model on the evaluation harness, simply run:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="python ./deepy.py eval.py -d configs your_configs.yml --eval_tasks task1 task2 ... taskn"><pre>python ./deepy.py eval.py -d configs your_configs.yml --eval_tasks task1 task2 ... taskn</pre></div> <p dir="auto">where <code>--eval_tasks</code> is a list of evaluation tasks followed by spaces, e.g <code>--eval_tasks lambada hellaswag piqa sciq</code>. For details of all tasks available, refer to the <a href="https://github.com/EleutherAI/lm-evaluation-harness">lm-evaluation-harness repo</a>.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Exporting to Hugging Face</h1><a id="user-content-exporting-to-hugging-face" class="anchor" aria-label="Permalink: Exporting to Hugging Face" href="#exporting-to-hugging-face"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">GPT-NeoX is optimized heavily for training only, and GPT-NeoX model checkpoints are not compatible out of the box with other deep learning libraries. To make models easily loadable and shareable with end users, and for further exporting to various other frameworks, GPT-NeoX supports checkpoint conversion to the <a href="https://arxiv.org/abs/1910.03771" rel="nofollow">Hugging Face Transformers</a> format.</p> <p dir="auto">Though NeoX supports a number of different architectural configurations, including AliBi positional embeddings, not all of these configurations map cleanly onto the supported configurations within Hugging Face Transformers.</p> <p dir="auto">NeoX supports export of compatible models into the following architectures:</p> <ul dir="auto"> <li>GPTNeoXForCausalLM</li> <li>LlamaForCausalLM</li> <li>MistralForCausalLM</li> </ul> <p dir="auto">Training a model which does not fit into one of these Hugging Face Transformers architectures cleanly will require writing custom modeling code for the exported model.</p> <p dir="auto">To convert a GPT-NeoX library checkpoint to Hugging Face-loadable format, run:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="python ./tools/ckpts/convert_neox_to_hf.py --input_dir /path/to/model/global_stepXXX --config_file your_config.yml --output_dir hf_model/save/location --precision {auto,fp16,bf16,fp32} --architecture {neox,mistral,llama}"><pre>python ./tools/ckpts/convert_neox_to_hf.py --input_dir /path/to/model/global_stepXXX --config_file your_config.yml --output_dir hf_model/save/location --precision {auto,fp16,bf16,fp32} --architecture {neox,mistral,llama}</pre></div> <p dir="auto">Then to upload a model to <a href="https://huggingface.co/" rel="nofollow">the Hugging Face Hub</a>, run:</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="huggingface-cli login python ./tools/ckpts/upload.py"><pre>huggingface-cli login python ./tools/ckpts/upload.py</pre></div> <p dir="auto">and input the requested information, including HF hub user token.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Importing Models Into GPT-NeoX</h3><a id="user-content-importing-models-into-gpt-neox" class="anchor" aria-label="Permalink: Importing Models Into GPT-NeoX" href="#importing-models-into-gpt-neox"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">NeoX supplies several utilities for converting a pretrained model checkpoint into a format that can be trained within the library.</p> <p dir="auto">The following models or model families can be loaded in GPT-NeoX:</p> <ul dir="auto"> <li>Llama 1</li> <li>Llama 2</li> <li>CodeLlama</li> <li>Mistral-7b-v0.1</li> </ul> <p dir="auto">We provide two utilities for converting from two different checkpoint formats into a format compatible with GPT-NeoX.</p> <p dir="auto">To convert a Llama 1 or Llama 2 checkpoint distributed by Meta AI from its original file format (downloadable <a href="https://github.com/facebookresearch/llama">here</a> or <a href="https://huggingface.co/meta-llama/Llama-2-7b" rel="nofollow">here</a>) into the GPT-NeoX library, run</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="python tools/ckpts/convert_raw_llama_weights_to_neox.py --input_dir /path/to/model/parent/dir/7B --model_size 7B --output_dir /path/to/save/ckpt --num_output_shards &lt;TENSOR_PARALLEL_SIZE&gt; (--pipeline_parallel if pipeline-parallel-size &gt;= 1)"><pre class="notranslate"><code>python tools/ckpts/convert_raw_llama_weights_to_neox.py --input_dir /path/to/model/parent/dir/7B --model_size 7B --output_dir /path/to/save/ckpt --num_output_shards &lt;TENSOR_PARALLEL_SIZE&gt; (--pipeline_parallel if pipeline-parallel-size &gt;= 1) </code></pre></div> <p dir="auto">To convert from a Hugging Face model into a NeoX-loadable, run <code>tools/ckpts/convert_hf_to_sequential.py</code>. See documentation within that file for further options.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Monitoring</h1><a id="user-content-monitoring" class="anchor" aria-label="Permalink: Monitoring" href="#monitoring"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">In addition to storing logs locally, we provide built-in support for two popular experiment monitoring frameworks: <a href="https://wandb.ai/site" rel="nofollow">Weights &amp; Biases</a>, <a href="https://www.tensorflow.org/tensorboard/" rel="nofollow">TensorBoard</a>, and <a href="https://www.comet.com/site" rel="nofollow">Comet</a></p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Weights and Biases</h2><a id="user-content-weights-and-biases" class="anchor" aria-label="Permalink: Weights and Biases" href="#weights-and-biases"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><a href="https://wandb.ai/eleutherai/neox" rel="nofollow">Weights &amp; Biases to record our experiments</a> is a machine learning monitoring platform. To use wandb to monitor your gpt-neox experiments:</p> <ol dir="auto"> <li>Create an account at <a href="https://wandb.ai/site" rel="nofollow">https://wandb.ai/site</a> to generate your API key</li> <li>Log into Weights &amp; Biases on your machine—you can do this by executing <code>wandb login</code>—your runs will automatically be recorded.</li> <li>Dependencies required for wandb monitoring can be found in and installed from <code>./requirements/requirements-wandb.txt</code>. An example config is provided in <code>./configs/local_setup_wandb.yml</code>.</li> <li>There are two optional fields associated with Weights &amp; Biases: <code><var>wandb_group</var></code> allows you to name the run group and <code><var>wandb_team</var></code> allows you to assign your runs to an organization or team account. An example config is provided in <code>./configs/local_setup_wandb.yml</code>.</li> </ol> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">TensorBoard</h2><a id="user-content-tensorboard" class="anchor" aria-label="Permalink: TensorBoard" href="#tensorboard"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We support using TensorBoard via the <code><var>tensorboard-dir</var></code> field. Dependencies required for TensorBoard monitoring can be found in and installed from <code>./requirements/requirements-tensorboard.txt</code>.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Comet</h2><a id="user-content-comet" class="anchor" aria-label="Permalink: Comet" href="#comet"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><a href="https://www.comet.com/site" rel="nofollow">Comet</a> is a machine learning monitoring platform. To use comet to monitor your gpt-neox experiments:</p> <ol dir="auto"> <li>Create an account at <a href="https://www.comet.com/login" rel="nofollow">https://www.comet.com/login</a> to generate your API key.</li> <li>Once generated, link your API key at runtime by running <code>comet login</code> or passing <code>export COMET_API_KEY=&lt;your-key-here&gt;</code></li> <li>Install <code>comet_ml</code> and any dependency libraries via <code>pip install -r requirements/requirements-comet.txt</code></li> <li>Enable Comet with <code>use_comet: True</code>. You can also customize where data is being logged with <code>comet_workspace</code> and <code>comet_project</code>. A full example config with comet enabled is provided in <code>configs/local_setup_comet.yml</code>.</li> <li>Run your experiment, and monitor metrics in the Comet workspace that you passed!</li> </ol> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Running on multi-node</h1><a id="user-content-running-on-multi-node" class="anchor" aria-label="Permalink: Running on multi-node" href="#running-on-multi-node"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">If you need to supply a hostfile for use with the MPI-based DeepSpeed launcher, you can set the environment variable <code>DLTS_HOSTFILE</code> to point to the hostfile.</p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Profiling</h1><a id="user-content-profiling" class="anchor" aria-label="Permalink: Profiling" href="#profiling"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We support profiling with Nsight Systems, the PyTorch Profiler, and PyTorch Memory Profiling.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Nsight Systems Profiling</h2><a id="user-content-nsight-systems-profiling" class="anchor" aria-label="Permalink: Nsight Systems Profiling" href="#nsight-systems-profiling"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To use the Nsight Systems profiling, set config options <code>profile</code>, <code>profile_step_start</code>, and <code>profile_step_stop</code> (see <a href="https://github.com/EleutherAI/gpt-neox/blob/main/configs/neox_arguments.md">here</a> for argument usage, and <a href="https://github.com/EleutherAI/gpt-neox/blob/main/configs/prof.yml">here</a> for a sample config).</p> <p dir="auto">To populate nsys metrics, launch training with:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="nsys profile -s none -t nvtx,cuda -o &lt;path/to/profiling/output&gt; --force-overwrite true \ --capture-range=cudaProfilerApi --capture-range-end=stop python $TRAIN_PATH/deepy.py \ $TRAIN_PATH/train.py --conf_dir configs &lt;config files&gt;"><pre class="notranslate"><code>nsys profile -s none -t nvtx,cuda -o &lt;path/to/profiling/output&gt; --force-overwrite true \ --capture-range=cudaProfilerApi --capture-range-end=stop python $TRAIN_PATH/deepy.py \ $TRAIN_PATH/train.py --conf_dir configs &lt;config files&gt; </code></pre></div> <p dir="auto">The generated output file can then by viewed with the Nsight Systems GUI:</p> <p dir="auto"><a target="_blank" rel="noopener noreferrer" href="/EleutherAI/gpt-neox/blob/main/images/nsight_profiling.png"><img src="/EleutherAI/gpt-neox/raw/main/images/nsight_profiling.png" alt="nsight-prof" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">PyTorch Profiling</h2><a id="user-content-pytorch-profiling" class="anchor" aria-label="Permalink: PyTorch Profiling" href="#pytorch-profiling"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To use the built-in PyTorch profiler, set config options <code>profile</code>, <code>profile_step_start</code>, and <code>profile_step_stop</code> (see <a href="https://github.com/EleutherAI/gpt-neox/blob/main/configs/neox_arguments.md">here</a> for argument usage, and <a href="https://github.com/EleutherAI/gpt-neox/blob/main/configs/prof.yml">here</a> for a sample config).</p> <p dir="auto">The PyTorch profiler will save traces to your <code>tensorboard</code> log directory. You can view these traces within TensorBoard by following the steps <a href="https://pytorch.org/tutorials/intermediate/tensorboard_profiler_tutorial.html" rel="nofollow">here</a>.</p> <p dir="auto"><a target="_blank" rel="noopener noreferrer" href="/EleutherAI/gpt-neox/blob/main/images/pytorch_profiling.png"><img src="/EleutherAI/gpt-neox/raw/main/images/pytorch_profiling.png" alt="torch-prof" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">PyTorch Memory Profiling</h2><a id="user-content-pytorch-memory-profiling" class="anchor" aria-label="Permalink: PyTorch Memory Profiling" href="#pytorch-memory-profiling"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To use PyTorch Memory Profiling, set config options <code>memory_profiling</code> and <code>memory_profiling_path</code> (see <a href="https://github.com/EleutherAI/gpt-neox/blob/main/configs/neox_arguments.md">here</a> for argument usage, and <a href="https://github.com/EleutherAI/gpt-neox/blob/main/configs/prof.yml">here</a> for a sample config).</p> <p dir="auto"><a target="_blank" rel="noopener noreferrer" href="/EleutherAI/gpt-neox/blob/main/images/memory_profiling.png"><img src="/EleutherAI/gpt-neox/raw/main/images/memory_profiling.png" alt="mem-prof" style="max-width: 100%;"></a></p> <p dir="auto">View the generated profile with the <a href="https://github.com/pytorch/pytorch/blob/main/torch/cuda/_memory_viz.py">memory_viz.py</a> script. Run with:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="python _memory_viz.py trace_plot &lt;generated_profile&gt; -o trace.html"><pre class="notranslate"><code>python _memory_viz.py trace_plot &lt;generated_profile&gt; -o trace.html </code></pre></div> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Adoption and Publications</h1><a id="user-content-adoption-and-publications" class="anchor" aria-label="Permalink: Adoption and Publications" href="#adoption-and-publications"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The GPT-NeoX library was been widely adopted by academic and industry researchers and ported on to many HPC systems.</p> <p dir="auto">If you have found this library useful in your research, please reach out and let us know! We would love to add you to our lists.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Publications</h2><a id="user-content-publications" class="anchor" aria-label="Permalink: Publications" href="#publications"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">EleutherAI and our collaborators have used it in the following publications:</p> <ul dir="auto"> <li><strong>Sid Black</strong>, <strong>Stella Biderman</strong>, <strong>Eric Hallahan</strong>, <strong>Quentin Anthony</strong>, <strong>Leo Gao</strong>, <strong>Laurence Golding</strong>, <strong>Horace He</strong>, <strong>Connor Leahy</strong>, <strong>Kyle McDonell</strong>, <strong>Jason Phang</strong>, <strong>Michael Pieler</strong>, <strong>Shivanshu Purohit</strong>, <strong>Laria Reynolds</strong>, <strong>Jon Tow</strong>, <strong>Ben Wang</strong>, and <strong>Samuel Weinbach</strong>. "<a href="https://arxiv.org/abs/2204.06745" rel="nofollow">GPT-NeoX-20B: An Open-Source Autoregressive Language Model</a>." In <em>Proceedings of the ACL Workshop on Challenges &amp; Perspectives in Creating Large Language Models</em>, 2022.</li> <li><strong>Stella Biderman</strong>, <strong>Hailey Schoelkopf</strong>, <strong>Quentin Anthony</strong>, <strong>Herbie Bradley</strong>, <strong>Kyle O'Brien</strong>, <strong>Eric Hallahan</strong>, <strong>Mohammad Aflah Khan</strong>, <strong>Shivanshu Purohit</strong>, <strong>USVSN Sai Prashanth</strong>, Edward Raff, <strong>Aviya Skowron</strong>, <strong>Lintang Sutawika</strong>, <strong>Oskar van der Wal</strong>. "<a href="https://arxiv.org/abs/2304.01373" rel="nofollow">Pythia: A suite for analyzing large language models across training and scaling</a>." In <em>International Conference on Machine Learning</em>, pp. 2397-2430. <em>PMLR</em>, 2023.</li> <li>Zhangir Azerbayev, Bartosz Piotrowski, <strong>Hailey Schoelkopf</strong>, Edward W. Ayers, Dragomir Radev, and Jeremy Avigad. "<a href="https://arxiv.org/abs/2302.12433" rel="nofollow">Proofnet: Autoformalizing and formally proving undergraduate-level mathematics</a>. <em>arXiv preprint arXiv:2302.12433</em>, 2023.</li> <li><strong>Stella Biderman</strong>, <strong>USVSN Sai Prashanth</strong>, <strong>Lintang Sutawika</strong>, <strong>Hailey Schoelkopf</strong>, <strong>Quentin Anthony</strong>, <strong>Shivanshu Purohit</strong>, and Edward Raff. "<a href="https://arxiv.org/abs/2304.11158" rel="nofollow">Emergent and predictable memorization in large language models.</a>" In <em>Neural Information Processing Systems</em>, 2023.</li> <li><strong>Hyunwoong Ko</strong>, <strong>Kichang Yang</strong>, <strong>Minho Ryu</strong>, <strong>Taekyoon Choi</strong>, <strong>Seungmu Yang,</strong> and Sungho Park. "<a href="https://arxiv.org/abs/2306.02254" rel="nofollow">A Technical Report for Polyglot-Ko: Open-Source Large-Scale Korean Language Models</a>." <em>arXiv preprint arXiv:2306.02254</em>, 2023.</li> <li>Kshitij Gupta, Benjamin Thérien, Adam Ibrahim, Mats Leon Richter, <strong>Quentin Anthony</strong>, Eugene Belilovsky, Irina Rish, and Timothée Lesort. "<a href="https://arxiv.org/abs/2308.04014" rel="nofollow">Continual Pre-Training of Large Language Models: How to re-warm your model?</a>" In <em>Workshop on Efficient Systems for Foundation Models @ ICML</em>, 2023.</li> <li><strong>Zhangir Azerbayev</strong>, <strong>Hailey Schoelkopf</strong>, Keiran Paster, Marco Dos Santos, Stephen McAleer, Albert Q Jiang, Jia Deng, <strong>Stella Biderman</strong>, and Sean Welleck. "Llemma: An open language model for mathematics" In <em>Math-AI Workshop @ NeurIPS</em>, 2023.</li> <li>Alexander Havrilla, Maksym Zhuravinskyi, Duy Phung, Aman Tiwari, Jonathan Tow, <strong>Stella Biderman</strong>, <strong>Quentin Anthony</strong>, and <strong>Louis Castricato</strong>. "<a href="https://aclanthology.org/2023.emnlp-main.530/" rel="nofollow">trlX: A Framework for Large Scale Reinforcement Learning from Human Feedback</a>." In <em>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</em>, 2023.</li> <li><strong>Quentin Anthony</strong>, <strong>Jacob Hatef</strong>, Deepak Narayanan, <strong>Stella Biderman</strong>, Stas Bekman, Junqi Yin, Aamir Shafi, Hari Subramoni, and Dhabaleswar Panda. "<a href="https://arxiv.org/abs/2401.14489" rel="nofollow">The Case for Co-Designing Model Architectures with Hardware</a>." In <em>arXiv preprint</em>, 2024.</li> <li>Adam Ibrahim, Benjamin Thérien, Kshitij Gupta, Mats L. Richter, <strong>Quentin Anthony</strong>, Timothée Lesort, Eugene Belilovsky, Irina Rish. "<a href="https://arxiv.org/abs/2403.08763" rel="nofollow">Simple and Scalable Strategies to Continually Pre-train Large Language Models</a>." In <em>arXiv preprint</em>, 2024.</li> <li>Junqi Yin, Avishek Bose, Guojing Cong, Isaac Lyngaas, <strong>Quentin Anthony</strong>. "<a href="https://arxiv.org/abs/2402.00691" rel="nofollow">Comparative Study of Large Language Model Architectures on Frontier</a>." In <em>arXiv preprint</em>, 2024.</li> </ul> <p dir="auto">The following publications by other research groups use this library:</p> <ul dir="auto"> <li>Ta-Chung Chi, Ting-Han Fan, Peter J. Ramadge, and Alexander Rudnicky. "<a href="https://arxiv.org/abs/2205.09921" rel="nofollow">KERPLE: Kernelized Relative Positional Embedding for Length Extrapolation</a>." In <em>Advances in Neural Information Processing Systems</em> 35, 2022.</li> <li>Sameera Horawalavithana, Ellyn Ayton, Shivam Sharma, Scott Howland, Megha Subramanian, Scott Vasquez, Robin Cosbey, Maria Glenski, and Svitlana Volkova. "<a href="https://aclanthology.org/2022.bigscience-1.12/" rel="nofollow">Foundation Models of Scientific Knowledge for Chemistry: Opportunities, Challenges and Lessons Learned</a>." In <em>Proceedings of the ACL Workshop on Challenges &amp; Perspectives in Creating Large Language Models</em>, 2022.</li> <li>Sophia Kolak, Ruben Martins, Claire Le Goues, and Vincent J. Hellendoorn. "<a href="https://par.nsf.gov/biblio/10340618" rel="nofollow">Patch Generation with Language Models: Feasibility and Scaling Behavior</a>"." In <em>Proceedings of the Deep Learning for Code Workshop at ICLR</em>, 2022.</li> <li>Frank F. Xu, Uri Alon, Graham Neubig, and Vincent J. Hellendoorn. "<a href="https://arxiv.org/abs/2202.13169" rel="nofollow">A Systematic Evaluation of Large Language Models of Code</a>." In <em>Proceedings of the ICLR Workshop on Deep Learning For Code</em>, 2022.</li> <li>Byung-Doh Oh and William Schuler. "<a href="https://arxiv.org/abs/2304.11389" rel="nofollow">Transformer-Based LM Surprisal Predicts Human Reading Times Best with About Two Billion Training Tokens</a>." In <em>Findings of the Association for Computational Linguistics</em>, 2023.</li> <li>Ta-Chung Chi, Ting-Han Fan, Alexander Rudnicky, and Peter Ramadge. "<a href="https://aclanthology.org/2023.acl-long.756/" rel="nofollow">Dissecting Transformer Length Extrapolation via the Lens of Receptive Field Analysis</a>." In <em>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</em>, pp. 13522-13537, 2023.</li> <li>Ta-Chung Chi, Ting-Han Fan, Li-Wei Chen, Alexander Rudnicky, and Peter Ramadge. "<a href="https://aclanthology.org/2023.acl-short.102/" rel="nofollow">Latent Positional Information is in the Self-Attention Variance of Transformer Language Models Without Positional Embeddings</a>." In <em>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</em>, pp. 13522-13537, 2023.</li> <li>Xidong Feng, Yicheng Luo, Ziyan Wang, Hongrui Tang, Mengyue Yang, Kun Shao, David Mguni, Yali Du, and Jun Wang. "<a href="https://arxiv.org/abs/2306.09200" rel="nofollow">ChessGPT: Bridging Policy Learning and Language Modeling.</a>" <em>arXiv preprint arXiv:2306.09200</em>, 2023.</li> <li>Orion Walker Dollar, Sameera Horawalavithana, Scott Vasquez, W. James Pfaendtner, and Svitlana Volkova. "<a href="https://openreview.net/pdf?id=7UudBVsIrr" rel="nofollow">MolJET: Multimodal Joint Embedding Transformer for Conditional de novo Molecular Design and Multi-Property Optimization.</a>" <em>preprint under review</em>, 2023.</li> <li>Jean Kaddour and Qi Liu. "<a href="https://arxiv.org/abs/2310.01119" rel="nofollow">Text Data Augmentation in Low-Resource Settings via Fine-Tuning of Large Language Models</a>." <em>arXiv:2310.01119</em>, 2023.</li> <li>Alon Albalak, Liangming Pan, Colin Raffel, and William Yang Wang. "<a href="https://arxiv.org/abs/2312.02406" rel="nofollow">Efficient Online Data Mixing For Language Model Pre-Training</a>." In <em>NeurIPS Workshop on R0-FoMo: Robustness of Few-shot and Zero-shot Learning in Large Foundation Models</em>, 2023.</li> <li>Eghbal A. Hosseini and Evelina Fedorenko. "<a href="https://www.biorxiv.org/content/10.1101/2023.11.05.564832v1" rel="nofollow">Large language models implicitly learn to straighten neural sentence trajectories to construct a predictive representation of natural language</a>." In <em>Neural Information Processing Systems</em>, 2023.</li> <li>Junqi Yin, Sajal Dash, Feiyi Wang, and Mallikarjun Shankar. "<a href="https://dl.acm.org/doi/abs/10.1145/3581784.3613215" rel="nofollow">FORGE: Pre-Training Open Foundation Models for Science</a>. In <em>Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis</em>, 1-13, 2023.</li> <li>Jean Kaddour and Qi Liu. "<a href="https://arxiv.org/abs/2310.01119" rel="nofollow">Text Data Augmentation in Low-Resource Settings via Fine-Tuning of Large Language Models</a>." In <em>arXiv preprint arXiv:2310.01119</em>, 2023.</li> <li>Peng Di, Jianguo Li, Hang Yu, Wei Jiang, Wenting Cai, Yang Cao, Chaoyu Chen, Dajun Chen, Hongwei Chen, Liang Chen, Gang Fan, Jie Gong, Zi Gong, Wen Hu, Tingting Guo, Zhichao Lei, Ting Li, Zheng Li, Ming Liang, Cong Liao, Bingchang Liu, Jiachen Liu, Zhiwei Liu, Shaojun Lu, Min Shen, Guangpei Wang, Huan Wang, Zhi Wang, Zhaogui Xu, Jiawei Yang, Qing Ye, Gehao Zhang, Yu Zhang, Zelin Zhao, Xunjin Zheng, Hailian Zhou, Lifu Zhu, and Xianying Zhu. "<a href="https://arxiv.org/abs/2310.06266" rel="nofollow">CodeFuse-13B: A Pretrained Multi-lingual Code Large Language Model</a>." In <em>arXiv preprint arXiv:2310.06266</em>, 2023.</li> <li>Nikitha Rao, Kush Jain, Uri Alon, Claire Le Goues, and Vincent J Hellendoorn. "<a href="https://arxiv.org/abs/2310.01602" rel="nofollow">CAT-LM Training Language Models on Aligned Code And Tests</a>." In <em>38th IEEE/ACM International Conference on Automated Software Engineering (ASE)</em>, pp. 409-420. IEEE, 2023.</li> <li>Pratyush Patel, Esha Choukse, Chaojie Zhang, Íñigo Goiri, Brijesh Warrier, Nithish Mahalingam, Ricardo Bianchini. "<a href="https://arxiv.org/abs/2308.12908" rel="nofollow">POLCA: Power Oversubscription in LLM Cloud Providers</a>." In <em>arXiv preprint</em>, 2023.</li> <li>Junqi Yin, Sajal Dash, John Gounley, Feiyi Wang, and Georgia Tourassi. "<a href="https://link.springer.com/article/10.1007/s11227-023-05479-7" rel="nofollow">Evaluation of pre-training large language models on leadership-class supercomputers</a>." In <em>the Journal of Supercomputing</em> 79, no. 18, 2023.</li> <li>Tal Kadosh, Niranjan Hasabnis, Vy A. Vo, Nadav Schneider, Neva Krien, Mihai Capota, Abdul Wasay, Nesreen Ahmed, Ted Willke, Guy Tamir, Yuval Pinter, Timothy Mattson, and Gal Oren. "<a href="https://arxiv.org/abs/2312.13322" rel="nofollow">Domain-Specific Code Language Models: Unraveling the Potential for HPC Codes and Tasks</a>." In <em>arXiv preprint</em>, 2023.</li> <li>Guobin Shen, Dongcheng Zhao, Yiting Dong, Yang Li, Jindong Li, Kang Sun, and Yi Zeng. "<a href="https://arxiv.org/abs/2312.07625" rel="nofollow">Astrocyte-Enabled Advancements in Spiking Neural Networks for Large Language Modeling</a>." In <em>arXiv preprint</em>, 2023.</li> <li>Eghbal A. Hosseini, Martin A. Schrimpf, Yian Zhang, Samuel Bowman, Noga Zaslavsky, and Evelina Fedorenko. "<a href="https://www.biorxiv.org/content/10.1101/2022.10.04.510681" rel="nofollow">Artificial neural network language models align neurally and behaviorally with humans even after a developmentally realistic amount of training.</a>" In <em>Neurobiology of Language</em>, 2024.</li> <li>Xiongye Xiao, Chenyu Zhou, Heng Ping, Defu Cao, Yaxing Li, Yizhuo Zhou, Shixuan Li, and Paul Bogdan. "<a href="https://arxiv.org/abs/2402.09099" rel="nofollow">Exploring Neuron Interactions and Emergence in LLMs: From the Multifractal Analysis Perspective</a>." In <em>arXiv preprint</em>, 2024.</li> <li>Zhiyuan Zeng, Qipeng Guo, Zhaoye Fei, Zhangyue Yin, Yunhua Zhou, Linyang Li, Tianxiang Sun, Hang Yan, Dahua Lin, and Xipeng Qiu. "<a href="https://arxiv.org/abs/2402.12399" rel="nofollow">Turn Waste into Worth: Rectifying Top-k Router of MoE</a>." In <em>arXiv preprint</em>, 2024.</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Models</h2><a id="user-content-models" class="anchor" aria-label="Permalink: Models" href="#models"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The following models were trained using this library:</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">English LLMs</h3><a id="user-content-english-llms" class="anchor" aria-label="Permalink: English LLMs" href="#english-llms"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>EleutherAI's <a href="https://huggingface.co/EleutherAI/gpt-neox-20b" rel="nofollow">GPT-NeoX-20B</a> and <a href="https://github.com/EleutherAI/pythia">Pythia (70M through 13B)</a></li> <li>CarperAI's <a href="https://huggingface.co/CarperAI/FIM-NeoX-1.3B" rel="nofollow">FIM-NeoX-1.3B</a></li> <li>StabilityAI's <a href="https://github.com/Stability-AI/StableLM">StableLM (3B and 7B)</a></li> <li>Together.ai's <a href="https://together.ai/blog/redpajama-models-v1" rel="nofollow">RedPajama-INCITE (3B and 7B)</a></li> <li>Carnegie Mellon University's <a href="https://huggingface.co/hoskinson-center/proofGPT-v0.1-6.7B" rel="nofollow">proofGPT (1.3B and 6.7B)</a></li> <li>Dampish's <a href="https://huggingface.co/Dampish/StellarX-4B-V0.2" rel="nofollow">StellarX (2.8B and 4B)</a></li> <li>Chinese Academy of Sciences's <a href="https://arxiv.org/abs/2312.07625" rel="nofollow">AstroSNN (1.5B)</a></li> </ul> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Non-English LLMs</h3><a id="user-content-non-english-llms" class="anchor" aria-label="Permalink: Non-English LLMs" href="#non-english-llms"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>EleutherAI's <a href="https://github.com/EleutherAI/polyglot">Polyglot-Ko (1.3B through 12.8B)</a> (Korean)</li> <li>Korea University's <a href="https://github.com/nlpai-lab/KULLM">KULLM-Polyglot (5.8B and 12.8B)</a> (Korean)</li> <li>Stability AI's <a href="https://huggingface.co/stabilityai/japanese-stablelm-base-alpha-7b" rel="nofollow">Japanese Stable LM (7B)</a> (Japanese)</li> <li>LearnItAnyway's <a href="https://huggingface.co/LearnItAnyway/llava-polyglot-ko-1.3b-hf" rel="nofollow">LLaVA-Polyglot-Ko (1.3B)</a> (Korean)</li> <li>Rinna Co.'s <a href="https://huggingface.co/rinna/japanese-gpt-neox-3.6b" rel="nofollow">japanese-gpt-neox-3.6b</a> (Japanese) and <a href="https://huggingface.co/rinna/bilingual-gpt-neox-4b" rel="nofollow">bilingual-gpt-neox-4b</a> (English / Japanese)</li> <li>CyberAgent's <a href="https://huggingface.co/cyberagent/open-calm-7b" rel="nofollow">Open-CLM (125M through 7B)</a> (Japanese)</li> <li>The Hungarian Research Centre for Linguistics's <a href="https://huggingface.co/NYTK/PULI-GPTrio" rel="nofollow">PULI GPTrio (6.7B)</a> (Hungarian / English / Chinese)</li> <li>The University of Tokyo's <a href="https://huggingface.co/Kojima777/weblab-10b" rel="nofollow">weblab-10b</a> and <a href="https://huggingface.co/Kojima777/weblab-10b-instruction-sft" rel="nofollow">weblab-10b-instruct</a> (Japanese)</li> <li>nolando.ai's <a href="https://blog.nolano.ai/Hi-NOLIN/" rel="nofollow">Hi-NOLIN (9B)</a> (English, Hindi)</li> <li>Renmin University of China's <a href="https://huggingface.co/yulan-team/YuLan-Base-12b" rel="nofollow">YuLan (12B)</a> (English, Chinese)</li> <li>The Basque Center for Language Technology's <a href="https://huggingface.co/HiTZ/latxa-70b-v1.2" rel="nofollow">Latixna (70B)</a> (Basque)</li> </ul> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Code Models</h3><a id="user-content-code-models" class="anchor" aria-label="Permalink: Code Models" href="#code-models"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Carnegie Mellon University's <a href="https://github.com/VHellendoorn/Code-LMs">PolyCoder (160M through 2.7B)</a> and <a href="https://huggingface.co/nikitharao/catlm" rel="nofollow">CAT-LM (2.7B)</a></li> <li>StabilityAI's <a href="https://stability.ai/blog/stablecode-llm-generative-ai-coding" rel="nofollow">StableCode (1.3B)</a> and <a href="https://stability.ai/blog/stablecode-llm-generative-ai-coding" rel="nofollow">StableCode-Completion-Alpha (3B)</a></li> <li>CodeFuse AI's <a href="https://huggingface.co/codefuse-ai/CodeFuse-13B" rel="nofollow">CodeFuse (13B)</a></li> </ul> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">AI for Science</h3><a id="user-content-ai-for-science" class="anchor" aria-label="Permalink: AI for Science" href="#ai-for-science"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>EleutherAI's <a href="https://arxiv.org/abs/2310.10631" rel="nofollow">LLeMMA (34B)</a></li> <li>Oak Ridge National Lab's <a href="https://github.com/at-aaims/forge">FORGE (26B)</a></li> <li>Oak Ridge National Lab's <a href="https://arxiv.org/abs/2402.00691" rel="nofollow">Unnamed Material Science Domain Models (7B)</a></li> <li>Pacific Northwest National Lab's <a href="https://openreview.net/pdf?id=7UudBVsIrr" rel="nofollow">MolJet (undisclosed size)</a></li> </ul> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Other Modalities</h3><a id="user-content-other-modalities" class="anchor" aria-label="Permalink: Other Modalities" href="#other-modalities"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Rinna Co.'s <a href="https://arxiv.org/abs/2406.12428" rel="nofollow">PSLM (7B)</a> (speech / text)</li> <li>University College London's <a href="https://huggingface.co/Waterhorse/chessgpt-base-v1" rel="nofollow">ChessGPT-3B</a></li> <li>Gretel's <a href="https://huggingface.co/gretelai/text2table" rel="nofollow">Text-to-Table (3B)</a></li> </ul> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Administrative Notes</h1><a id="user-content-administrative-notes" class="anchor" aria-label="Permalink: Administrative Notes" href="#administrative-notes"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Citing GPT-NeoX</h2><a id="user-content-citing-gpt-neox" class="anchor" aria-label="Permalink: Citing GPT-NeoX" href="#citing-gpt-neox"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">If you have found the GPT-NeoX library helpful in your work, you can cite this repository as</p> <div class="highlight highlight-text-bibtex notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="@software{gpt-neox-library, title = {{GPT-NeoX: Large Scale Autoregressive Language Modeling in PyTorch}}, author = {Andonian, Alex and Anthony, Quentin and Biderman, Stella and Black, Sid and Gali, Preetham and Gao, Leo and Hallahan, Eric and Levy-Kramer, Josh and Leahy, Connor and Nestler, Lucas and Parker, Kip and Pieler, Michael and Phang, Jason and Purohit, Shivanshu and Schoelkopf, Hailey and Stander, Dashiell and Songz, Tri and Tigges, Curt and Thérien, Benjamin and Wang, Phil and Weinbach, Samuel}, url = {https://www.github.com/eleutherai/gpt-neox}, doi = {10.5281/zenodo.5879544}, month = {9}, year = {2023}, version = {2.0.0}, }"><pre><span class="pl-k">@software</span>{<span class="pl-en">gpt-neox-library</span>, <span class="pl-s">title</span> = <span class="pl-s"><span class="pl-pds">{</span>{GPT-NeoX: Large Scale Autoregressive Language Modeling in PyTorch}<span class="pl-pds">}</span></span>, <span class="pl-s">author</span> = <span class="pl-s"><span class="pl-pds">{</span>Andonian, Alex and Anthony, Quentin and Biderman, Stella and Black, Sid and Gali, Preetham and Gao, Leo and Hallahan, Eric and Levy-Kramer, Josh and Leahy, Connor and Nestler, Lucas and Parker, Kip and Pieler, Michael and Phang, Jason and Purohit, Shivanshu and Schoelkopf, Hailey and Stander, Dashiell and Songz, Tri and Tigges, Curt and Thérien, Benjamin and Wang, Phil and Weinbach, Samuel<span class="pl-pds">}</span></span>, <span class="pl-s">url</span> = <span class="pl-s"><span class="pl-pds">{</span>https://www.github.com/eleutherai/gpt-neox<span class="pl-pds">}</span></span>, <span class="pl-s">doi</span> = <span class="pl-s"><span class="pl-pds">{</span>10.5281/zenodo.5879544<span class="pl-pds">}</span></span>, <span class="pl-s">month</span> = <span class="pl-s"><span class="pl-pds">{</span>9<span class="pl-pds">}</span></span>, <span class="pl-s">year</span> = <span class="pl-s"><span class="pl-pds">{</span>2023<span class="pl-pds">}</span></span>, <span class="pl-s">version</span> = <span class="pl-s"><span class="pl-pds">{</span>2.0.0<span class="pl-pds">}</span></span>, }</pre></div> <p dir="auto">To cite the 20 billion parameter model named <code>GPT-NeoX-20B</code>, please use</p> <div class="highlight highlight-text-bibtex notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="@inproceedings{gpt-neox-20b, title={{GPT-NeoX-20B}: An Open-Source Autoregressive Language Model}, author={Black, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel}, booktitle={Proceedings of the ACL Workshop on Challenges \&amp; Perspectives in Creating Large Language Models}, url={https://arxiv.org/abs/2204.06745}, year={2022} }"><pre><span class="pl-k">@inproceedings</span>{<span class="pl-en">gpt-neox-20b</span>, <span class="pl-s">title</span>=<span class="pl-s"><span class="pl-pds">{</span>{GPT-NeoX-20B}: An Open-Source Autoregressive Language Model<span class="pl-pds">}</span></span>, <span class="pl-s">author</span>=<span class="pl-s"><span class="pl-pds">{</span>Black, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel<span class="pl-pds">}</span></span>, <span class="pl-s">booktitle</span>=<span class="pl-s"><span class="pl-pds">{</span>Proceedings of the ACL Workshop on Challenges \&amp; Perspectives in Creating Large Language Models<span class="pl-pds">}</span></span>, <span class="pl-s">url</span>=<span class="pl-s"><span class="pl-pds">{</span>https://arxiv.org/abs/2204.06745<span class="pl-pds">}</span></span>, <span class="pl-s">year</span>=<span class="pl-s"><span class="pl-pds">{</span>2022<span class="pl-pds">}</span></span> }</pre></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Contributing</h2><a id="user-content-contributing" class="anchor" aria-label="Permalink: Contributing" href="#contributing"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">GPT-NeoX is built by the open-source AI community, and relies on our amazing contributors! Please see our <a href="/EleutherAI/gpt-neox/blob/main/CONTRIBUTING.md">contributing</a> guide for more details on our CLA, code formatting, testing, etc.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Licensing</h2><a id="user-content-licensing" class="anchor" aria-label="Permalink: Licensing" href="#licensing"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">This repository hosts code that is part of EleutherAI's GPT-NeoX project. Copyright (c) 2024, EleutherAI. Licensed under the Apache License:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="Licensed under the Apache License, Version 2.0 (the &quot;License&quot;); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."><pre class="notranslate"><code>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. </code></pre></div> <p dir="auto">This repository is based off code written by NVIDIA that is licensed under the Apache License, Version 2.0. In accordance with the Apache License, all files that are modifications of code originally written by NVIDIA maintain a NVIDIA copyright header. All files that do not contain such a header are the exclusive copyright of EleutherAI. When the NVIDIA code has been modified from its original version, that fact is noted in the copyright header. All derivative works of this repository must preserve these headers under the terms of the Apache License.</p> <p dir="auto">This repository also contains code written by a number of other authors. Such contributions are marked and the relevant licensing is included where appropriate.</p> <p dir="auto">For full terms, see the <code>LICENSE</code> file. If you have any questions, comments, or concerns about licensing please email us at <a href="mailto:contact@eleuther.ai">contact@eleuther.ai</a>.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Acknowledgements</h2><a id="user-content-acknowledgements" class="anchor" aria-label="Permalink: Acknowledgements" href="#acknowledgements"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We run our experiments on a Kubernetes cluster provided by <a href="https://coreweave.com/" rel="nofollow">CoreWeave</a> and a Slurm cluster provided by <a href="https://stability.ai" rel="nofollow">Stability AI</a>. We are thankful to the DeepSpeed team for their advice and consultation.</p> </article></div></div></div></div></div> <!-- --> <!-- --> <script type="application/json" id="__PRIMER_DATA_:R0:__">{"resolvedServerColorMode":"day"}</script></div> </react-partial> <input type="hidden" data-csrf="true" value="ie/xSxSApBX1edcpKXnb+ppdbGrYQsZPXZ2FUONxyfABKlMhujkz2Y4U0VhjC1UGJQakVQ5qVM6rzvBofUgNIA==" /> </div> <div data-view-component="true" class="Layout-sidebar"> <div class="BorderGrid about-margin" data-pjax> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <div class="hide-sm hide-md"> <h2 class="mb-3 h4">About</h2> <p class="f4 my-3"> An implementation of model parallel autoregressive transformers on GPUs, based on the Megatron and DeepSpeed libraries </p> <div class="my-3 d-flex flex-items-center"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link flex-shrink-0 mr-2"> <path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path> </svg> <span class="flex-auto min-width-0 css-truncate css-truncate-target width-fit"> <a title="https://www.eleuther.ai/" role="link" target="_blank" rel="noopener noreferrer nofollow" class="text-bold" href="https://www.eleuther.ai/">www.eleuther.ai/</a> </span> </div> <h3 class="sr-only">Topics</h3> <div class="my-3"> <div class="f6"> <a href="/topics/transformers" title="Topic: transformers" data-view-component="true" class="topic-tag topic-tag-link"> transformers </a> <a href="/topics/language-model" title="Topic: language-model" data-view-component="true" class="topic-tag topic-tag-link"> language-model </a> <a href="/topics/gpt-3" title="Topic: gpt-3" data-view-component="true" class="topic-tag topic-tag-link"> gpt-3 </a> <a href="/topics/deepspeed-library" title="Topic: deepspeed-library" data-view-component="true" class="topic-tag topic-tag-link"> deepspeed-library </a> </div> </div> <h3 class="sr-only">Resources</h3> <div class="mt-2"> <a class="Link--muted" data-analytics-event="{&quot;category&quot;:&quot;Repository Overview&quot;,&quot;action&quot;:&quot;click&quot;,&quot;label&quot;:&quot;location:sidebar;file:readme&quot;}" href="#readme-ov-file"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book mr-2"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> Readme </a> </div> <h3 class="sr-only">License</h3> <div class="mt-2"> <a href="#Apache-2.0-1-ov-file" class="Link--muted" data-analytics-event="{&quot;category&quot;:&quot;Repository Overview&quot;,&quot;action&quot;:&quot;click&quot;,&quot;label&quot;:&quot;location:sidebar;file:license&quot;}" > <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-law mr-2"> <path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"></path> </svg> Apache-2.0 license </a> </div> <include-fragment src="/EleutherAI/gpt-neox/hovercards/citation/sidebar_partial?tree_name=main"> </include-fragment> <div class="mt-2"> <a href="/EleutherAI/gpt-neox/activity" data-view-component="true" class="Link Link--muted"><svg text="gray" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-pulse mr-2"> <path d="M6 2c.306 0 .582.187.696.471L10 10.731l1.304-3.26A.751.751 0 0 1 12 7h3.25a.75.75 0 0 1 0 1.5h-2.742l-1.812 4.528a.751.751 0 0 1-1.392 0L6 4.77 4.696 8.03A.75.75 0 0 1 4 8.5H.75a.75.75 0 0 1 0-1.5h2.742l1.812-4.529A.751.751 0 0 1 6 2Z"></path> </svg> <span class="color-fg-muted">Activity</span></a> </div> <div class="mt-2"> <a href="/EleutherAI/gpt-neox/custom-properties" data-view-component="true" class="Link Link--muted"><svg text="gray" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-note mr-2"> <path d="M0 3.75C0 2.784.784 2 1.75 2h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0 1 14.25 14H1.75A1.75 1.75 0 0 1 0 12.25Zm1.75-.25a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-8.5a.25.25 0 0 0-.25-.25ZM3.5 6.25a.75.75 0 0 1 .75-.75h7a.75.75 0 0 1 0 1.5h-7a.75.75 0 0 1-.75-.75Zm.75 2.25h4a.75.75 0 0 1 0 1.5h-4a.75.75 0 0 1 0-1.5Z"></path> </svg> <span class="color-fg-muted">Custom properties</span></a> </div> <h3 class="sr-only">Stars</h3> <div class="mt-2"> <a href="/EleutherAI/gpt-neox/stargazers" data-view-component="true" class="Link Link--muted"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star mr-2"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg> <strong>7.1k</strong> stars</a> </div> <h3 class="sr-only">Watchers</h3> <div class="mt-2"> <a href="/EleutherAI/gpt-neox/watchers" data-view-component="true" class="Link Link--muted"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-eye mr-2"> <path d="M8 2c1.981 0 3.671.992 4.933 2.078 1.27 1.091 2.187 2.345 2.637 3.023a1.62 1.62 0 0 1 0 1.798c-.45.678-1.367 1.932-2.637 3.023C11.67 13.008 9.981 14 8 14c-1.981 0-3.671-.992-4.933-2.078C1.797 10.83.88 9.576.43 8.898a1.62 1.62 0 0 1 0-1.798c.45-.677 1.367-1.931 2.637-3.022C4.33 2.992 6.019 2 8 2ZM1.679 7.932a.12.12 0 0 0 0 .136c.411.622 1.241 1.75 2.366 2.717C5.176 11.758 6.527 12.5 8 12.5c1.473 0 2.825-.742 3.955-1.715 1.124-.967 1.954-2.096 2.366-2.717a.12.12 0 0 0 0-.136c-.412-.621-1.242-1.75-2.366-2.717C10.824 4.242 9.473 3.5 8 3.5c-1.473 0-2.825.742-3.955 1.715-1.124.967-1.954 2.096-2.366 2.717ZM8 10a2 2 0 1 1-.001-3.999A2 2 0 0 1 8 10Z"></path> </svg> <strong>126</strong> watching</a> </div> <h3 class="sr-only">Forks</h3> <div class="mt-2"> <a href="/EleutherAI/gpt-neox/forks" data-view-component="true" class="Link Link--muted"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo-forked mr-2"> <path d="M5 5.372v.878c0 .414.336.75.75.75h4.5a.75.75 0 0 0 .75-.75v-.878a2.25 2.25 0 1 1 1.5 0v.878a2.25 2.25 0 0 1-2.25 2.25h-1.5v2.128a2.251 2.251 0 1 1-1.5 0V8.5h-1.5A2.25 2.25 0 0 1 3.5 6.25v-.878a2.25 2.25 0 1 1 1.5 0ZM5 3.25a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Zm6.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5Zm-3 8.75a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Z"></path> </svg> <strong>1k</strong> forks</a> </div> <div class="mt-2"> <a class="Link--muted" href="/contact/report-content?content_url=https%3A%2F%2Fgithub.com%2FEleutherAI%2Fgpt-neox&amp;report=EleutherAI+%28user%29"> Report repository </a> </div> </div> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame"> <a href="/EleutherAI/gpt-neox/releases" data-view-component="true" class="Link--primary no-underline Link">Releases <span title="3" data-view-component="true" class="Counter">3</span></a></h2> <a class="Link--primary d-flex no-underline" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" href="/EleutherAI/gpt-neox/releases/tag/v2.0"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-tag flex-shrink-0 mt-1 color-fg-success"> <path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path> </svg> <div class="ml-2 min-width-0"> <div class="d-flex"> <span class="css-truncate css-truncate-target text-bold mr-2" style="max-width: none;">GPT-NeoX 2.0</span> <span title="Label: Latest" data-view-component="true" class="Label Label--success flex-shrink-0"> Latest </span> </div> <div class="text-small color-fg-muted"><relative-time datetime="2023-03-10T00:26:35Z" class="no-wrap">Mar 10, 2023</relative-time></div> </div> </a> <div data-view-component="true" class="mt-3"> <a text="small" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" href="/EleutherAI/gpt-neox/releases" data-view-component="true" class="Link">+ 2 releases</a></div> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3"> <a href="/orgs/EleutherAI/packages?repo_name=gpt-neox" data-view-component="true" class="Link--primary no-underline Link d-flex flex-items-center">Packages <span title="0" hidden="hidden" data-view-component="true" class="Counter ml-1">0</span></a></h2> <div class="text-small color-fg-muted" > No packages published <br> </div> </div> </div> <div class="BorderGrid-row" hidden> <div class="BorderGrid-cell"> <include-fragment src="/EleutherAI/gpt-neox/used_by_list" accept="text/fragment+html"> </include-fragment> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3"> <a href="/EleutherAI/gpt-neox/graphs/contributors" data-view-component="true" class="Link--primary no-underline Link d-flex flex-items-center">Contributors <span title="111" data-view-component="true" class="Counter ml-1">111</span></a></h2> <ul class="list-style-none d-flex flex-wrap mb-n2"> <li class="mb-2 mr-2" > <a href="https://github.com/StellaAthena" class="" data-hovercard-type="user" data-hovercard-url="/users/StellaAthena/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/15899312?s=64&amp;v=4" alt="@StellaAthena" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/sdtblck" class="" data-hovercard-type="user" data-hovercard-url="/users/sdtblck/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/46172032?s=64&amp;v=4" alt="@sdtblck" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/joshlk" class="" data-hovercard-type="user" data-hovercard-url="/users/joshlk/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/5648645?s=64&amp;v=4" alt="@joshlk" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/Quentin-Anthony" class="" data-hovercard-type="user" data-hovercard-url="/users/Quentin-Anthony/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/10281105?s=64&amp;v=4" alt="@Quentin-Anthony" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/haileyschoelkopf" class="" data-hovercard-type="user" data-hovercard-url="/users/haileyschoelkopf/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/65563625?s=64&amp;v=4" alt="@haileyschoelkopf" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/ShivanshuPurohit" class="" data-hovercard-type="user" data-hovercard-url="/users/ShivanshuPurohit/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/42869065?s=64&amp;v=4" alt="@ShivanshuPurohit" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/dashstander" class="" data-hovercard-type="user" data-hovercard-url="/users/dashstander/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/7449128?s=64&amp;v=4" alt="@dashstander" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/leogao2" class="" data-hovercard-type="user" data-hovercard-url="/users/leogao2/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/54557097?s=64&amp;v=4" alt="@leogao2" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/trisongz" class="" data-hovercard-type="user" data-hovercard-url="/users/trisongz/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/4735784?s=64&amp;v=4" alt="@trisongz" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/lucidrains" class="" data-hovercard-type="user" data-hovercard-url="/users/lucidrains/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/108653?s=64&amp;v=4" alt="@lucidrains" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/jahatef" class="" data-hovercard-type="user" data-hovercard-url="/users/jahatef/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/74274091?s=64&amp;v=4" alt="@jahatef" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/sweinbach" class="" data-hovercard-type="user" data-hovercard-url="/users/sweinbach/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/18198364?s=64&amp;v=4" alt="@sweinbach" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/dmahan93" class="" data-hovercard-type="user" data-hovercard-url="/users/dmahan93/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/44207705?s=64&amp;v=4" alt="@dmahan93" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> </ul> <div data-view-component="true" class="mt-3"> <a text="small" href="/EleutherAI/gpt-neox/graphs/contributors" data-view-component="true" class="Link--inTextBlock Link">+ 97 contributors</a></div> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3">Languages</h2> <div class="mb-2"> <span data-view-component="true" class="Progress"> <span style="background-color:#3572A5 !important;;width: 85.0%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#f34b7d !important;;width: 11.5%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#3A4E3A !important;;width: 2.6%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#89e051 !important;;width: 0.5%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#384d54 !important;;width: 0.3%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#555555 !important;;width: 0.1%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> </span></div> <ul class="list-style-none"> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/EleutherAI/gpt-neox/search?l=python" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#3572A5;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">Python</span> <span>85.0%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/EleutherAI/gpt-neox/search?l=c%2B%2B" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#f34b7d;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">C++</span> <span>11.5%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/EleutherAI/gpt-neox/search?l=cuda" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#3A4E3A;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">Cuda</span> <span>2.6%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/EleutherAI/gpt-neox/search?l=shell" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#89e051;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">Shell</span> <span>0.5%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/EleutherAI/gpt-neox/search?l=dockerfile" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#384d54;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">Dockerfile</span> <span>0.3%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/EleutherAI/gpt-neox/search?l=c" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#555555;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">C</span> <span>0.1%</span> </a> </li> </ul> </div> </div> </div> </div> </div></div> </div> </div> </turbo-frame> </main> </div> </div> <footer class="footer pt-8 pb-6 f6 color-fg-muted p-responsive" role="contentinfo" > <h2 class='sr-only'>Footer</h2> <div class="d-flex flex-justify-center flex-items-center flex-column-reverse flex-lg-row flex-wrap flex-lg-nowrap"> <div class="d-flex flex-items-center flex-shrink-0 mx-2"> <a aria-label="Homepage" title="GitHub" class="footer-octicon mr-2" href="https://github.com"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-mark-github"> <path d="M12 1C5.9225 1 1 5.9225 1 12C1 16.8675 4.14875 20.9787 8.52125 22.4362C9.07125 22.5325 9.2775 22.2025 9.2775 21.9137C9.2775 21.6525 9.26375 20.7862 9.26375 19.865C6.5 20.3737 5.785 19.1912 5.565 18.5725C5.44125 18.2562 4.905 17.28 4.4375 17.0187C4.0525 16.8125 3.5025 16.3037 4.42375 16.29C5.29 16.2762 5.90875 17.0875 6.115 17.4175C7.105 19.0812 8.68625 18.6137 9.31875 18.325C9.415 17.61 9.70375 17.1287 10.02 16.8537C7.5725 16.5787 5.015 15.63 5.015 11.4225C5.015 10.2262 5.44125 9.23625 6.1425 8.46625C6.0325 8.19125 5.6475 7.06375 6.2525 5.55125C6.2525 5.55125 7.17375 5.2625 9.2775 6.67875C10.1575 6.43125 11.0925 6.3075 12.0275 6.3075C12.9625 6.3075 13.8975 6.43125 14.7775 6.67875C16.8813 5.24875 17.8025 5.55125 17.8025 5.55125C18.4075 7.06375 18.0225 8.19125 17.9125 8.46625C18.6138 9.23625 19.04 10.2125 19.04 11.4225C19.04 15.6437 16.4688 16.5787 14.0213 16.8537C14.42 17.1975 14.7638 17.8575 14.7638 18.8887C14.7638 20.36 14.75 21.5425 14.75 21.9137C14.75 22.2025 14.9563 22.5462 15.5063 22.4362C19.8513 20.9787 23 16.8537 23 12C23 5.9225 18.0775 1 12 1Z"></path> </svg> </a> <span> &copy; 2025 GitHub,&nbsp;Inc. </span> </div> <nav aria-label="Footer"> <h3 class="sr-only" id="sr-footer-heading">Footer navigation</h3> <ul class="list-style-none d-flex flex-justify-center flex-wrap mb-2 mb-lg-0" aria-labelledby="sr-footer-heading"> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to Terms&quot;,&quot;label&quot;:&quot;text:terms&quot;}" href="https://docs.github.com/site-policy/github-terms/github-terms-of-service" data-view-component="true" class="Link--secondary Link">Terms</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to privacy&quot;,&quot;label&quot;:&quot;text:privacy&quot;}" href="https://docs.github.com/site-policy/privacy-policies/github-privacy-statement" data-view-component="true" class="Link--secondary Link">Privacy</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to security&quot;,&quot;label&quot;:&quot;text:security&quot;}" href="https://github.com/security" data-view-component="true" class="Link--secondary Link">Security</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to status&quot;,&quot;label&quot;:&quot;text:status&quot;}" href="https://www.githubstatus.com/" data-view-component="true" class="Link--secondary Link">Status</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to docs&quot;,&quot;label&quot;:&quot;text:docs&quot;}" href="https://docs.github.com/" data-view-component="true" class="Link--secondary Link">Docs</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to contact&quot;,&quot;label&quot;:&quot;text:contact&quot;}" href="https://support.github.com?tags=dotcom-footer" data-view-component="true" class="Link--secondary Link">Contact</a> </li> <li class="mx-2" > <cookie-consent-link> <button type="button" class="Link--secondary underline-on-hover border-0 p-0 color-bg-transparent" data-action="click:cookie-consent-link#showConsentManagement" data-analytics-event="{&quot;location&quot;:&quot;footer&quot;,&quot;action&quot;:&quot;cookies&quot;,&quot;context&quot;:&quot;subfooter&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;cookies_link_subfooter_footer&quot;}" > Manage cookies </button> </cookie-consent-link> </li> <li class="mx-2"> <cookie-consent-link> <button type="button" class="Link--secondary underline-on-hover border-0 p-0 color-bg-transparent" data-action="click:cookie-consent-link#showConsentManagement" data-analytics-event="{&quot;location&quot;:&quot;footer&quot;,&quot;action&quot;:&quot;dont_share_info&quot;,&quot;context&quot;:&quot;subfooter&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;dont_share_info_link_subfooter_footer&quot;}" > Do not share my personal information </button> </cookie-consent-link> </li> </ul> </nav> </div> </footer> <ghcc-consent id="ghcc" class="position-fixed bottom-0 left-0" style="z-index: 999999" data-initial-cookie-consent-allowed="" data-cookie-consent-required="false"></ghcc-consent> <div id="ajax-error-message" class="ajax-error-message flash flash-error" hidden> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-alert"> <path d="M6.457 1.047c.659-1.234 2.427-1.234 3.086 0l6.082 11.378A1.75 1.75 0 0 1 14.082 15H1.918a1.75 1.75 0 0 1-1.543-2.575Zm1.763.707a.25.25 0 0 0-.44 0L1.698 13.132a.25.25 0 0 0 .22.368h12.164a.25.25 0 0 0 .22-.368Zm.53 3.996v2.5a.75.75 0 0 1-1.5 0v-2.5a.75.75 0 0 1 1.5 0ZM9 11a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> <button type="button" class="flash-close js-ajax-error-dismiss" aria-label="Dismiss error"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> You can’t perform that action at this time. </div> <template id="site-details-dialog"> <details class="details-reset details-overlay details-overlay-dark lh-default color-fg-default hx_rsm" open> <summary role="button" aria-label="Close dialog"></summary> <details-dialog class="Box Box--overlay d-flex flex-column anim-fade-in fast hx_rsm-dialog hx_rsm-modal"> <button class="Box-btn-octicon m-0 btn-octicon position-absolute right-0 top-0" type="button" aria-label="Close dialog" data-close-dialog> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> <div class="octocat-spinner my-6 js-details-dialog-spinner"></div> </details-dialog> </details> </template> <div class="Popover js-hovercard-content position-absolute" style="display: none; outline: none;"> <div class="Popover-message Popover-message--bottom-left Popover-message--large Box color-shadow-large" style="width:360px;"> </div> </div> <template id="snippet-clipboard-copy-button"> <div class="zeroclipboard-container position-absolute right-0 top-0"> <clipboard-copy aria-label="Copy" class="ClipboardButton btn js-clipboard-copy m-2 p-0" data-copy-feedback="Copied!" data-tooltip-direction="w"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copy js-clipboard-copy-icon m-2"> <path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"></path><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"></path> </svg> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-check js-clipboard-check-icon color-fg-success d-none m-2"> <path d="M13.78 4.22a.75.75 0 0 1 0 1.06l-7.25 7.25a.75.75 0 0 1-1.06 0L2.22 9.28a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018L6 10.94l6.72-6.72a.75.75 0 0 1 1.06 0Z"></path> </svg> </clipboard-copy> </div> </template> <template id="snippet-clipboard-copy-button-unpositioned"> <div class="zeroclipboard-container"> <clipboard-copy aria-label="Copy" class="ClipboardButton btn btn-invisible js-clipboard-copy m-2 p-0 d-flex flex-justify-center flex-items-center" data-copy-feedback="Copied!" data-tooltip-direction="w"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copy js-clipboard-copy-icon"> <path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"></path><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"></path> </svg> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-check js-clipboard-check-icon color-fg-success d-none"> <path d="M13.78 4.22a.75.75 0 0 1 0 1.06l-7.25 7.25a.75.75 0 0 1-1.06 0L2.22 9.28a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018L6 10.94l6.72-6.72a.75.75 0 0 1 1.06 0Z"></path> </svg> </clipboard-copy> </div> </template> </div> <div id="js-global-screen-reader-notice" class="sr-only mt-n1" aria-live="polite" aria-atomic="true" ></div> <div id="js-global-screen-reader-notice-assertive" class="sr-only mt-n1" aria-live="assertive" aria-atomic="true"></div> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10