CINXE.COM
GPT-2 - Wikipedia
<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>GPT-2 - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"d655e37e-5095-492d-9a75-990cbbb15068","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"GPT-2","wgTitle":"GPT-2","wgCurRevisionId":1278972269,"wgRevisionId":1278972269,"wgArticleId":66045029,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["CS1 maint: numeric names: authors list","Articles with short description","Short description is different from Wikidata","All articles with unsourced statements","Articles with unsourced statements from June 2024","Large language models","Generative pre-trained transformers","Software using the MIT license","OpenAI"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"GPT-2","wgRelevantArticleId":66045029,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":40000,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q95726727","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false}; RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","mediawiki.page.media","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.cite.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&only=styles&skin=vector-2022"> <script async="" src="/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&modules=site.styles&only=styles&skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.20"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/a/ad/GPT2-talks-about-GPT2.png/1200px-GPT2-talks-about-GPT2.png"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="595"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/a/ad/GPT2-talks-about-GPT2.png/800px-GPT2-talks-about-GPT2.png"> <meta property="og:image:width" content="800"> <meta property="og:image:height" content="397"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/a/ad/GPT2-talks-about-GPT2.png/640px-GPT2-talks-about-GPT2.png"> <meta property="og:image:width" content="640"> <meta property="og:image:height" content="317"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="GPT-2 - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/GPT-2"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=GPT-2&action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/GPT-2"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-GPT-2 rootpage-GPT-2 skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" title="Main menu" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li><li id="n-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages"><span>Special pages</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page's font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/?wmf_source=donate&wmf_medium=sidebar&wmf_campaign=en.wikipedia.org&uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&returnto=GPT-2" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&returnto=GPT-2" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/?wmf_source=donate&wmf_medium=sidebar&wmf_campaign=en.wikipedia.org&uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&returnto=GPT-2" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&returnto=GPT-2" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-Training" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Training"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Training</span> </div> </a> <ul id="toc-Training-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Release" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Release"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Release</span> </div> </a> <button aria-controls="toc-Release-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Release subsection</span> </button> <ul id="toc-Release-sublist" class="vector-toc-list"> <li id="toc-Restrictions_and_partial_release" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Restrictions_and_partial_release"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1</span> <span>Restrictions and partial release</span> </div> </a> <ul id="toc-Restrictions_and_partial_release-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-774M_release" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#774M_release"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.2</span> <span>774M release</span> </div> </a> <ul id="toc-774M_release-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Full_1.5B_release" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Full_1.5B_release"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.3</span> <span>Full 1.5B release</span> </div> </a> <ul id="toc-Full_1.5B_release-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Small_and_Medium_Releases" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Small_and_Medium_Releases"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.4</span> <span>Small and Medium Releases</span> </div> </a> <ul id="toc-Small_and_Medium_Releases-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Limitations" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Limitations"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Limitations</span> </div> </a> <ul id="toc-Limitations-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Application_and_subsequent_research" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Application_and_subsequent_research"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Application and subsequent research</span> </div> </a> <ul id="toc-Application_and_subsequent_research-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Performance_and_evaluation" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Performance_and_evaluation"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>Performance and evaluation</span> </div> </a> <ul id="toc-Performance_and_evaluation-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" title="Table of Contents" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">GPT-2</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 13 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-13" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">13 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/GPT-2" title="GPT-2 – Catalan" lang="ca" hreflang="ca" data-title="GPT-2" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/GPT-2" title="GPT-2 – Spanish" lang="es" hreflang="es" data-title="GPT-2" data-language-autonym="Español" data-language-local-name="Spanish" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%D8%AC%DB%8C%E2%80%8C%D9%BE%DB%8C%E2%80%8C%D8%AA%DB%8C_%DB%B2" title="جیپیتی ۲ – Persian" lang="fa" hreflang="fa" data-title="جیپیتی ۲" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-ko mw-list-item"><a href="https://ko.wikipedia.org/wiki/GPT-2" title="GPT-2 – Korean" lang="ko" hreflang="ko" data-title="GPT-2" data-language-autonym="한국어" data-language-local-name="Korean" class="interlanguage-link-target"><span>한국어</span></a></li><li class="interlanguage-link interwiki-hi mw-list-item"><a href="https://hi.wikipedia.org/wiki/%E0%A4%9C%E0%A5%80%E0%A4%AA%E0%A5%80%E0%A4%9F%E0%A5%80-2" title="जीपीटी-2 – Hindi" lang="hi" hreflang="hi" data-title="जीपीटी-2" data-language-autonym="हिन्दी" data-language-local-name="Hindi" class="interlanguage-link-target"><span>हिन्दी</span></a></li><li class="interlanguage-link interwiki-ja mw-list-item"><a href="https://ja.wikipedia.org/wiki/GPT-2" title="GPT-2 – Japanese" lang="ja" hreflang="ja" data-title="GPT-2" data-language-autonym="日本語" data-language-local-name="Japanese" class="interlanguage-link-target"><span>日本語</span></a></li><li class="interlanguage-link interwiki-pt mw-list-item"><a href="https://pt.wikipedia.org/wiki/GPT-2" title="GPT-2 – Portuguese" lang="pt" hreflang="pt" data-title="GPT-2" data-language-autonym="Português" data-language-local-name="Portuguese" class="interlanguage-link-target"><span>Português</span></a></li><li class="interlanguage-link interwiki-kaa mw-list-item"><a href="https://kaa.wikipedia.org/wiki/GPT-2" title="GPT-2 – Kara-Kalpak" lang="kaa" hreflang="kaa" data-title="GPT-2" data-language-autonym="Qaraqalpaqsha" data-language-local-name="Kara-Kalpak" class="interlanguage-link-target"><span>Qaraqalpaqsha</span></a></li><li class="interlanguage-link interwiki-fi mw-list-item"><a href="https://fi.wikipedia.org/wiki/GPT-2" title="GPT-2 – Finnish" lang="fi" hreflang="fi" data-title="GPT-2" data-language-autonym="Suomi" data-language-local-name="Finnish" class="interlanguage-link-target"><span>Suomi</span></a></li><li class="interlanguage-link interwiki-sv mw-list-item"><a href="https://sv.wikipedia.org/wiki/GPT-2" title="GPT-2 – Swedish" lang="sv" hreflang="sv" data-title="GPT-2" data-language-autonym="Svenska" data-language-local-name="Swedish" class="interlanguage-link-target"><span>Svenska</span></a></li><li class="interlanguage-link interwiki-uk badge-Q70893996 mw-list-item" title=""><a href="https://uk.wikipedia.org/wiki/GPT-2" title="GPT-2 – Ukrainian" lang="uk" hreflang="uk" data-title="GPT-2" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/GPT-2" title="GPT-2 – Cantonese" lang="yue" hreflang="yue" data-title="GPT-2" data-language-autonym="粵語" data-language-local-name="Cantonese" class="interlanguage-link-target"><span>粵語</span></a></li><li class="interlanguage-link interwiki-zh mw-list-item"><a href="https://zh.wikipedia.org/wiki/GPT-2" title="GPT-2 – Chinese" lang="zh" hreflang="zh" data-title="GPT-2" data-language-autonym="中文" data-language-local-name="Chinese" class="interlanguage-link-target"><span>中文</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q95726727#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/GPT-2" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:GPT-2" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/GPT-2"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=GPT-2&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=GPT-2&action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/GPT-2"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=GPT-2&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=GPT-2&action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/GPT-2" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/GPT-2" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=GPT-2&oldid=1278972269" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=GPT-2&action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&page=GPT-2&id=1278972269&wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FGPT-2"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FGPT-2"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&page=GPT-2&action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=GPT-2&printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q95726727" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">2019 text-generating language model</div> <style data-mw-deduplicate="TemplateStyles:r1236090951">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}@media print{body.ns-0 .mw-parser-output .hatnote{display:none!important}}</style><div role="note" class="hatnote navigation-not-searchable">See also: <a href="/wiki/Generative_pre-trained_transformer#Foundation_models" title="Generative pre-trained transformer">Generative pre-trained transformer § Foundation models</a></div> <style data-mw-deduplicate="TemplateStyles:r1257001546">.mw-parser-output .infobox-subbox{padding:0;border:none;margin:-3px;width:auto;min-width:100%;font-size:100%;clear:none;float:none;background-color:transparent}.mw-parser-output .infobox-3cols-child{margin:auto}.mw-parser-output .infobox .navbar{font-size:100%}@media screen{html.skin-theme-clientpref-night .mw-parser-output .infobox-full-data:not(.notheme)>div:not(.notheme)[style]{background:#1f1f23!important;color:#f8f9fa}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .infobox-full-data:not(.notheme) div:not(.notheme){background:#1f1f23!important;color:#f8f9fa}}@media(min-width:640px){body.skin--responsive .mw-parser-output .infobox-table{display:table!important}body.skin--responsive .mw-parser-output .infobox-table>caption{display:table-caption!important}body.skin--responsive .mw-parser-output .infobox-table>tbody{display:table-row-group}body.skin--responsive .mw-parser-output .infobox-table tr{display:table-row!important}body.skin--responsive .mw-parser-output .infobox-table th,body.skin--responsive .mw-parser-output .infobox-table td{padding-left:inherit;padding-right:inherit}}</style><table class="infobox vevent"><caption class="infobox-title summary">Generative Pre-trained Transformer 2 (GPT-2)</caption><tbody><tr><td colspan="2" class="infobox-image logo"><span class="mw-default-size" typeof="mw:File/Frameless"><a href="/wiki/File:GPT2-talks-about-GPT2.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/a/ad/GPT2-talks-about-GPT2.png/220px-GPT2-talks-about-GPT2.png" decoding="async" width="220" height="109" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/a/ad/GPT2-talks-about-GPT2.png/330px-GPT2-talks-about-GPT2.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/a/ad/GPT2-talks-about-GPT2.png/440px-GPT2-talks-about-GPT2.png 2x" data-file-width="1902" data-file-height="943" /></a></span><div class="infobox-caption">GPT-2 completion using the <a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a> Write With Transformer website, prompted with text from this article (All highlighted text after the initial prompt is machine-generated from the first suggested completion, without further editing.)</div></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Programmer" title="Programmer">Original author(s)</a></th><td class="infobox-data"><a href="/wiki/OpenAI" title="OpenAI">OpenAI</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Initial release</th><td class="infobox-data">14 February 2019<span class="noprint">; 6 years ago</span><span style="display:none"> (<span class="bday dtstart published updated">14 February 2019</span>)</span></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Repository_(version_control)" title="Repository (version control)">Repository</a></th><td class="infobox-data"><a rel="nofollow" class="external free" href="https://github.com/openai/gpt-2">https://github.com/openai/gpt-2</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Predecessor</th><td class="infobox-data"><a href="/wiki/GPT-1" title="GPT-1">GPT-1</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Successor</th><td class="infobox-data"><a href="/wiki/GPT-3" title="GPT-3">GPT-3</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_categories#Categorization_approaches" title="Software categories">Type</a></th><td class="infobox-data"><style data-mw-deduplicate="TemplateStyles:r1126788409">.mw-parser-output .plainlist ol,.mw-parser-output .plainlist ul{line-height:inherit;list-style:none;margin:0;padding:0}.mw-parser-output .plainlist ol li,.mw-parser-output .plainlist ul li{margin-bottom:0}</style><div class="plainlist" style="margin-left:1em;text-indent:-1em;"> <ul><li><a href="/wiki/Large_language_model" title="Large language model">Large language model</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">Generative pre-trained transformer</a></li></ul> </div></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_license" title="Software license">License</a></th><td class="infobox-data"><a href="/wiki/MIT_license" class="mw-redirect" title="MIT license">MIT</a><sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Website</th><td class="infobox-data"><span class="url"><a rel="nofollow" class="external text" href="https://openai.com/blog/gpt-2-1-5b-release/">openai<wbr />.com<wbr />/blog<wbr />/gpt-2-1-5b-release<wbr />/</a></span></td></tr></tbody></table> <style data-mw-deduplicate="TemplateStyles:r1244144826">.mw-parser-output .machine-learning-list-title{background-color:#ddddff}html.skin-theme-clientpref-night .mw-parser-output .machine-learning-list-title{background-color:#222}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .machine-learning-list-title{background-color:#222}}</style> <style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1246091330">.mw-parser-output .sidebar{width:22em;float:right;clear:right;margin:0.5em 0 1em 1em;background:var(--background-color-neutral-subtle,#f8f9fa);border:1px solid var(--border-color-base,#a2a9b1);padding:0.2em;text-align:center;line-height:1.4em;font-size:88%;border-collapse:collapse;display:table}body.skin-minerva .mw-parser-output .sidebar{display:table!important;float:right!important;margin:0.5em 0 1em 1em!important}.mw-parser-output .sidebar-subgroup{width:100%;margin:0;border-spacing:0}.mw-parser-output .sidebar-left{float:left;clear:left;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-none{float:none;clear:both;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-outer-title{padding:0 0.4em 0.2em;font-size:125%;line-height:1.2em;font-weight:bold}.mw-parser-output .sidebar-top-image{padding:0.4em}.mw-parser-output .sidebar-top-caption,.mw-parser-output .sidebar-pretitle-with-top-image,.mw-parser-output .sidebar-caption{padding:0.2em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-pretitle{padding:0.4em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-title,.mw-parser-output .sidebar-title-with-pretitle{padding:0.2em 0.8em;font-size:145%;line-height:1.2em}.mw-parser-output .sidebar-title-with-pretitle{padding:0.1em 0.4em}.mw-parser-output .sidebar-image{padding:0.2em 0.4em 0.4em}.mw-parser-output .sidebar-heading{padding:0.1em 0.4em}.mw-parser-output .sidebar-content{padding:0 0.5em 0.4em}.mw-parser-output .sidebar-content-with-subgroup{padding:0.1em 0.4em 0.2em}.mw-parser-output .sidebar-above,.mw-parser-output .sidebar-below{padding:0.3em 0.8em;font-weight:bold}.mw-parser-output .sidebar-collapse .sidebar-above,.mw-parser-output .sidebar-collapse .sidebar-below{border-top:1px solid #aaa;border-bottom:1px solid #aaa}.mw-parser-output .sidebar-navbar{text-align:right;font-size:115%;padding:0 0.4em 0.4em}.mw-parser-output .sidebar-list-title{padding:0 0.4em;text-align:left;font-weight:bold;line-height:1.6em;font-size:105%}.mw-parser-output .sidebar-list-title-c{padding:0 0.4em;text-align:center;margin:0 3.3em}@media(max-width:640px){body.mediawiki .mw-parser-output .sidebar{width:100%!important;clear:both;float:none!important;margin-left:0!important;margin-right:0!important}}body.skin--responsive .mw-parser-output .sidebar a>img{max-width:none!important}@media screen{html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media print{body.ns-0 .mw-parser-output .sidebar{display:none!important}}</style><style data-mw-deduplicate="TemplateStyles:r886047488">.mw-parser-output .nobold{font-weight:normal}</style><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r886047488" /><table class="sidebar sidebar-collapse nomobile nowraplinks"><tbody><tr><td class="sidebar-pretitle">Part of a series on</td></tr><tr><th class="sidebar-title-with-pretitle"><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a><br />and <a href="/wiki/Data_mining" title="Data mining">data mining</a></th></tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Paradigms</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a></li> <li><a href="/wiki/Unsupervised_learning" title="Unsupervised learning">Unsupervised learning</a></li> <li><a href="/wiki/Semi-supervised_learning" class="mw-redirect" title="Semi-supervised learning">Semi-supervised learning</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></li> <li><a href="/wiki/Meta-learning_(computer_science)" title="Meta-learning (computer science)">Meta-learning</a></li> <li><a href="/wiki/Online_machine_learning" title="Online machine learning">Online learning</a></li> <li><a href="/wiki/Batch_learning" class="mw-redirect" title="Batch learning">Batch learning</a></li> <li><a href="/wiki/Curriculum_learning" title="Curriculum learning">Curriculum learning</a></li> <li><a href="/wiki/Rule-based_machine_learning" title="Rule-based machine learning">Rule-based learning</a></li> <li><a href="/wiki/Neuro-symbolic_AI" title="Neuro-symbolic AI">Neuro-symbolic AI</a></li> <li><a href="/wiki/Neuromorphic_engineering" class="mw-redirect" title="Neuromorphic engineering">Neuromorphic engineering</a></li> <li><a href="/wiki/Quantum_machine_learning" title="Quantum machine learning">Quantum machine learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Problems</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Statistical_classification" title="Statistical classification">Classification</a></li> <li><a href="/wiki/Generative_model" title="Generative model">Generative modeling</a></li> <li><a href="/wiki/Regression_analysis" title="Regression analysis">Regression</a></li> <li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></li> <li><a href="/wiki/Dimensionality_reduction" title="Dimensionality reduction">Dimensionality reduction</a></li> <li><a href="/wiki/Density_estimation" title="Density estimation">Density estimation</a></li> <li><a href="/wiki/Anomaly_detection" title="Anomaly detection">Anomaly detection</a></li> <li><a href="/wiki/Data_cleaning" class="mw-redirect" title="Data cleaning">Data cleaning</a></li> <li><a href="/wiki/Automated_machine_learning" title="Automated machine learning">AutoML</a></li> <li><a href="/wiki/Association_rule_learning" title="Association rule learning">Association rules</a></li> <li><a href="/wiki/Semantic_analysis_(machine_learning)" title="Semantic analysis (machine learning)">Semantic analysis</a></li> <li><a href="/wiki/Structured_prediction" title="Structured prediction">Structured prediction</a></li> <li><a href="/wiki/Feature_engineering" title="Feature engineering">Feature engineering</a></li> <li><a href="/wiki/Feature_learning" title="Feature learning">Feature learning</a></li> <li><a href="/wiki/Learning_to_rank" title="Learning to rank">Learning to rank</a></li> <li><a href="/wiki/Grammar_induction" title="Grammar induction">Grammar induction</a></li> <li><a href="/wiki/Ontology_learning" title="Ontology learning">Ontology learning</a></li> <li><a href="/wiki/Multimodal_learning" title="Multimodal learning">Multimodal learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0;"><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a><br /><span class="nobold"><span style="font-size:85%;">(<b><a href="/wiki/Statistical_classification" title="Statistical classification">classification</a></b> • <b><a href="/wiki/Regression_analysis" title="Regression analysis">regression</a></b>)</span></span> </div></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Apprenticeship_learning" title="Apprenticeship learning">Apprenticeship learning</a></li> <li><a href="/wiki/Decision_tree_learning" title="Decision tree learning">Decision trees</a></li> <li><a href="/wiki/Ensemble_learning" title="Ensemble learning">Ensembles</a> <ul><li><a href="/wiki/Bootstrap_aggregating" title="Bootstrap aggregating">Bagging</a></li> <li><a href="/wiki/Boosting_(machine_learning)" title="Boosting (machine learning)">Boosting</a></li> <li><a href="/wiki/Random_forest" title="Random forest">Random forest</a></li></ul></li> <li><a href="/wiki/K-nearest_neighbors_algorithm" title="K-nearest neighbors algorithm"><i>k</i>-NN</a></li> <li><a href="/wiki/Linear_regression" title="Linear regression">Linear regression</a></li> <li><a href="/wiki/Naive_Bayes_classifier" title="Naive Bayes classifier">Naive Bayes</a></li> <li><a href="/wiki/Artificial_neural_network" class="mw-redirect" title="Artificial neural network">Artificial neural networks</a></li> <li><a href="/wiki/Logistic_regression" title="Logistic regression">Logistic regression</a></li> <li><a href="/wiki/Perceptron" title="Perceptron">Perceptron</a></li> <li><a href="/wiki/Relevance_vector_machine" title="Relevance vector machine">Relevance vector machine (RVM)</a></li> <li><a href="/wiki/Support_vector_machine" title="Support vector machine">Support vector machine (SVM)</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/BIRCH" title="BIRCH">BIRCH</a></li> <li><a href="/wiki/CURE_algorithm" title="CURE algorithm">CURE</a></li> <li><a href="/wiki/Hierarchical_clustering" title="Hierarchical clustering">Hierarchical</a></li> <li><a href="/wiki/K-means_clustering" title="K-means clustering"><i>k</i>-means</a></li> <li><a href="/wiki/Fuzzy_clustering" title="Fuzzy clustering">Fuzzy</a></li> <li><a href="/wiki/Expectation%E2%80%93maximization_algorithm" title="Expectation–maximization algorithm">Expectation–maximization (EM)</a></li> <li><br /><a href="/wiki/DBSCAN" title="DBSCAN">DBSCAN</a></li> <li><a href="/wiki/OPTICS_algorithm" title="OPTICS algorithm">OPTICS</a></li> <li><a href="/wiki/Mean_shift" title="Mean shift">Mean shift</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Dimensionality_reduction" title="Dimensionality reduction">Dimensionality reduction</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Factor_analysis" title="Factor analysis">Factor analysis</a></li> <li><a href="/wiki/Canonical_correlation" title="Canonical correlation">CCA</a></li> <li><a href="/wiki/Independent_component_analysis" title="Independent component analysis">ICA</a></li> <li><a href="/wiki/Linear_discriminant_analysis" title="Linear discriminant analysis">LDA</a></li> <li><a href="/wiki/Non-negative_matrix_factorization" title="Non-negative matrix factorization">NMF</a></li> <li><a href="/wiki/Principal_component_analysis" title="Principal component analysis">PCA</a></li> <li><a href="/wiki/Proper_generalized_decomposition" title="Proper generalized decomposition">PGD</a></li> <li><a href="/wiki/T-distributed_stochastic_neighbor_embedding" title="T-distributed stochastic neighbor embedding">t-SNE</a></li> <li><a href="/wiki/Sparse_dictionary_learning" title="Sparse dictionary learning">SDL</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Structured_prediction" title="Structured prediction">Structured prediction</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Graphical_model" title="Graphical model">Graphical models</a> <ul><li><a href="/wiki/Bayesian_network" title="Bayesian network">Bayes net</a></li> <li><a href="/wiki/Conditional_random_field" title="Conditional random field">Conditional random field</a></li> <li><a href="/wiki/Hidden_Markov_model" title="Hidden Markov model">Hidden Markov</a></li></ul></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Anomaly_detection" title="Anomaly detection">Anomaly detection</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Random_sample_consensus" title="Random sample consensus">RANSAC</a></li> <li><a href="/wiki/K-nearest_neighbors_algorithm" title="K-nearest neighbors algorithm"><i>k</i>-NN</a></li> <li><a href="/wiki/Local_outlier_factor" title="Local outlier factor">Local outlier factor</a></li> <li><a href="/wiki/Isolation_forest" title="Isolation forest">Isolation forest</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Artificial_neural_network" class="mw-redirect" title="Artificial neural network">Artificial neural network</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li> <li><a href="/wiki/Feedforward_neural_network" title="Feedforward neural network">Feedforward neural network</a></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network</a> <ul><li><a href="/wiki/Long_short-term_memory" title="Long short-term memory">LSTM</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">GRU</a></li> <li><a href="/wiki/Echo_state_network" title="Echo state network">ESN</a></li> <li><a href="/wiki/Reservoir_computing" title="Reservoir computing">reservoir computing</a></li></ul></li> <li><a href="/wiki/Boltzmann_machine" title="Boltzmann machine">Boltzmann machine</a> <ul><li><a href="/wiki/Restricted_Boltzmann_machine" title="Restricted Boltzmann machine">Restricted</a></li></ul></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">GAN</a></li> <li><a href="/wiki/Diffusion_model" title="Diffusion model">Diffusion model</a></li> <li><a href="/wiki/Self-organizing_map" title="Self-organizing map">SOM</a></li> <li><a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">Convolutional neural network</a> <ul><li><a href="/wiki/U-Net" title="U-Net">U-Net</a></li> <li><a href="/wiki/LeNet" title="LeNet">LeNet</a></li> <li><a href="/wiki/AlexNet" title="AlexNet">AlexNet</a></li> <li><a href="/wiki/DeepDream" title="DeepDream">DeepDream</a></li></ul></li> <li><a href="/wiki/Neural_radiance_field" title="Neural radiance field">Neural radiance field</a></li> <li><a href="/wiki/Transformer_(machine_learning_model)" class="mw-redirect" title="Transformer (machine learning model)">Transformer</a> <ul><li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision</a></li></ul></li> <li><a href="/wiki/Mamba_(deep_learning_architecture)" title="Mamba (deep learning architecture)">Mamba</a></li> <li><a href="/wiki/Spiking_neural_network" title="Spiking neural network">Spiking neural network</a></li> <li><a href="/wiki/Memtransistor" title="Memtransistor">Memtransistor</a></li> <li><a href="/wiki/Electrochemical_RAM" title="Electrochemical RAM">Electrochemical RAM</a> (ECRAM)</li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Q-learning" title="Q-learning">Q-learning</a></li> <li><a href="/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action" title="State–action–reward–state–action">SARSA</a></li> <li><a href="/wiki/Temporal_difference_learning" title="Temporal difference learning">Temporal difference (TD)</a></li> <li><a href="/wiki/Multi-agent_reinforcement_learning" title="Multi-agent reinforcement learning">Multi-agent</a> <ul><li><a href="/wiki/Self-play_(reinforcement_learning_technique)" class="mw-redirect" title="Self-play (reinforcement learning technique)">Self-play</a></li></ul></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Learning with humans</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Active_learning_(machine_learning)" title="Active learning (machine learning)">Active learning</a></li> <li><a href="/wiki/Crowdsourcing" title="Crowdsourcing">Crowdsourcing</a></li> <li><a href="/wiki/Human-in-the-loop" title="Human-in-the-loop">Human-in-the-loop</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">RLHF</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Model diagnostics</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Coefficient_of_determination" title="Coefficient of determination">Coefficient of determination</a></li> <li><a href="/wiki/Confusion_matrix" title="Confusion matrix">Confusion matrix</a></li> <li><a href="/wiki/Learning_curve_(machine_learning)" title="Learning curve (machine learning)">Learning curve</a></li> <li><a href="/wiki/Receiver_operating_characteristic" title="Receiver operating characteristic">ROC curve</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Mathematical foundations</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Kernel_machines" class="mw-redirect" title="Kernel machines">Kernel machines</a></li> <li><a href="/wiki/Bias%E2%80%93variance_tradeoff" title="Bias–variance tradeoff">Bias–variance tradeoff</a></li> <li><a href="/wiki/Computational_learning_theory" title="Computational learning theory">Computational learning theory</a></li> <li><a href="/wiki/Empirical_risk_minimization" title="Empirical risk minimization">Empirical risk minimization</a></li> <li><a href="/wiki/Occam_learning" title="Occam learning">Occam learning</a></li> <li><a href="/wiki/Probably_approximately_correct_learning" title="Probably approximately correct learning">PAC learning</a></li> <li><a href="/wiki/Statistical_learning_theory" title="Statistical learning theory">Statistical learning</a></li> <li><a href="/wiki/Vapnik%E2%80%93Chervonenkis_theory" title="Vapnik–Chervonenkis theory">VC theory</a></li> <li><a href="/wiki/Topological_deep_learning" title="Topological deep learning">Topological deep learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Journals and conferences</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/ECML_PKDD" title="ECML PKDD">ECML PKDD</a></li> <li><a href="/wiki/Conference_on_Neural_Information_Processing_Systems" title="Conference on Neural Information Processing Systems">NeurIPS</a></li> <li><a href="/wiki/International_Conference_on_Machine_Learning" title="International Conference on Machine Learning">ICML</a></li> <li><a href="/wiki/International_Conference_on_Learning_Representations" title="International Conference on Learning Representations">ICLR</a></li> <li><a href="/wiki/International_Joint_Conference_on_Artificial_Intelligence" title="International Joint Conference on Artificial Intelligence">IJCAI</a></li> <li><a href="/wiki/Machine_Learning_(journal)" title="Machine Learning (journal)">ML</a></li> <li><a href="/wiki/Journal_of_Machine_Learning_Research" title="Journal of Machine Learning Research">JMLR</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Related articles</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Glossary_of_artificial_intelligence" title="Glossary of artificial intelligence">Glossary of artificial intelligence</a></li> <li><a href="/wiki/List_of_datasets_for_machine-learning_research" title="List of datasets for machine-learning research">List of datasets for machine-learning research</a> <ul><li><a href="/wiki/List_of_datasets_in_computer_vision_and_image_processing" title="List of datasets in computer vision and image processing">List of datasets in computer vision and image processing</a></li></ul></li> <li><a href="/wiki/Outline_of_machine_learning" title="Outline of machine learning">Outline of machine learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-navbar"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Machine_learning" title="Template:Machine learning"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Machine_learning" title="Template talk:Machine learning"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Machine_learning" title="Special:EditPage/Template:Machine learning"><abbr title="Edit this template">e</abbr></a></li></ul></div></td></tr></tbody></table> <p><b>Generative Pre-trained Transformer 2</b> (<b>GPT-2</b>) is a <a href="/wiki/Large_language_model" title="Large language model">large language model</a> by <a href="/wiki/OpenAI" title="OpenAI">OpenAI</a> and the second in their foundational series of <a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPT</a> models. GPT-2 was pre-trained on a dataset of 8 million web pages.<sup id="cite_ref-gpt2paper_2-0" class="reference"><a href="#cite_note-gpt2paper-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup> It was partially released in February 2019, followed by full release of the 1.5-billion-parameter model on November 5, 2019.<sup id="cite_ref-verge2_3-0" class="reference"><a href="#cite_note-verge2-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-15Brelease_4-0" class="reference"><a href="#cite_note-15Brelease-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-openai_5-0" class="reference"><a href="#cite_note-openai-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup> </p><p>GPT-2 was created as a "direct scale-up" of <a href="/wiki/GPT-1" title="GPT-1">GPT-1</a><sup id="cite_ref-gpt1paper_6-0" class="reference"><a href="#cite_note-gpt1paper-6"><span class="cite-bracket">[</span>6<span class="cite-bracket">]</span></a></sup> with a ten-fold increase in both its parameter count and the size of its training dataset.<sup id="cite_ref-openai_5-1" class="reference"><a href="#cite_note-openai-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup> It is a general-purpose learner and its ability to perform the various tasks was a consequence of its general ability to accurately predict the next item in a sequence,<sup id="cite_ref-gpt2paper_2-1" class="reference"><a href="#cite_note-gpt2paper-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-badpaper_7-0" class="reference"><a href="#cite_note-badpaper-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> which enabled it to <a href="/wiki/Machine_translation" title="Machine translation">translate</a> texts, <a href="/wiki/Question_answering" title="Question answering">answer questions</a> about a topic from a text, <a href="/wiki/Automatic_summarization" title="Automatic summarization">summarize</a> passages from a larger text,<sup id="cite_ref-badpaper_7-1" class="reference"><a href="#cite_note-badpaper-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> and <a href="/wiki/Natural_language_generation" title="Natural language generation">generate text output</a> on a level sometimes <a href="/wiki/Turing_test" title="Turing test">indistinguishable from that of humans</a>; however, it could become repetitive or nonsensical when generating long passages.<sup id="cite_ref-guardian_8-0" class="reference"><a href="#cite_note-guardian-8"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup> It was superseded by the GPT-3 and GPT-4 models, which are no longer open source. </p><p>GPT-2 has, like its predecessor GPT-1 and its successors GPT-3 and GPT-4, a <a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">generative pre-trained transformer</a> architecture, implementing a <a href="/wiki/Deep_neural_network" class="mw-redirect" title="Deep neural network">deep neural network</a>, specifically a <a href="/wiki/Transformer_(machine_learning_model)" class="mw-redirect" title="Transformer (machine learning model)">transformer</a> model,<sup id="cite_ref-gpt1paper_6-1" class="reference"><a href="#cite_note-gpt1paper-6"><span class="cite-bracket">[</span>6<span class="cite-bracket">]</span></a></sup> which uses <a href="/wiki/Attention_(machine_learning)" title="Attention (machine learning)">attention</a> instead of older recurrence- and convolution-based architectures.<sup id="cite_ref-attention_9-0" class="reference"><a href="#cite_note-attention-9"><span class="cite-bracket">[</span>9<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-attentionRNNs_10-0" class="reference"><a href="#cite_note-attentionRNNs-10"><span class="cite-bracket">[</span>10<span class="cite-bracket">]</span></a></sup> Attention mechanisms allow the model to selectively focus on segments of input text it predicts to be the most relevant.<sup id="cite_ref-jointly_11-0" class="reference"><a href="#cite_note-jointly-11"><span class="cite-bracket">[</span>11<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-effective_12-0" class="reference"><a href="#cite_note-effective-12"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup> This model allows for greatly increased <a href="/wiki/Parallelization" class="mw-redirect" title="Parallelization">parallelization</a>, and outperforms previous benchmarks for RNN/CNN/LSTM-based models.<sup id="cite_ref-gpt1paper_6-2" class="reference"><a href="#cite_note-gpt1paper-6"><span class="cite-bracket">[</span>6<span class="cite-bracket">]</span></a></sup> </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="Training">Training</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=1" title="Edit section: Training"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Since the transformer architecture enabled <a href="/wiki/Massively_parallel" title="Massively parallel">massive parallelization</a>, GPT models could be trained on larger corpora than previous NLP (natural language processing) models. While the GPT-1 model demonstrated that the approach was viable, GPT-2 would further explore the emergent properties of networks trained on extremely large corpora. <i><a href="/wiki/CommonCrawl" class="mw-redirect" title="CommonCrawl">CommonCrawl</a></i>, a large corpus produced by <a href="/wiki/Web_crawling" class="mw-redirect" title="Web crawling">web crawling</a> and previously used in training NLP systems,<sup id="cite_ref-commoncrawl_13-0" class="reference"><a href="#cite_note-commoncrawl-13"><span class="cite-bracket">[</span>13<span class="cite-bracket">]</span></a></sup> was considered due to its large size, but was rejected after further review revealed large amounts of unintelligible content.<sup id="cite_ref-gpt2paper_2-2" class="reference"><a href="#cite_note-gpt2paper-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-commoncrawl_13-1" class="reference"><a href="#cite_note-commoncrawl-13"><span class="cite-bracket">[</span>13<span class="cite-bracket">]</span></a></sup> Instead, OpenAI developed a new corpus, known as <i><a href="/w/index.php?title=WebText&action=edit&redlink=1" class="new" title="WebText (page does not exist)">WebText</a></i>; rather than scraping content indiscriminately from the <a href="/wiki/World_Wide_Web" title="World Wide Web">World Wide Web</a>, WebText was generated by scraping only pages linked to by <a href="/wiki/Reddit" title="Reddit">Reddit</a> posts that had received at least three <a href="/wiki/Upvote" class="mw-redirect" title="Upvote">upvotes</a> prior to December 2017. The corpus was subsequently cleaned; <a href="/wiki/HTML" title="HTML">HTML</a> documents were parsed into plain text, duplicate pages were eliminated, and Wikipedia pages were removed (since their presence in many other datasets could have induced <a href="/wiki/Overfitting" title="Overfitting">overfitting</a>).<sup id="cite_ref-gpt2paper_2-3" class="reference"><a href="#cite_note-gpt2paper-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup> </p><p>While the cost of training GPT-2 is known to have been $256 per hour,<sup id="cite_ref-register_14-0" class="reference"><a href="#cite_note-register-14"><span class="cite-bracket">[</span>14<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-staggering_15-0" class="reference"><a href="#cite_note-staggering-15"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup> the amount of hours it took to complete training is unknown; therefore, the overall training cost cannot be estimated accurately.<sup id="cite_ref-vb2_16-0" class="reference"><a href="#cite_note-vb2-16"><span class="cite-bracket">[</span>16<span class="cite-bracket">]</span></a></sup> However, comparable large language models using transformer architectures have had their costs documented in more detail; the training processes for <a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a> and <a href="/wiki/XLNet" title="XLNet">XLNet</a> consumed, respectively, $6,912 and $245,000 of resources.<sup id="cite_ref-staggering_15-1" class="reference"><a href="#cite_note-staggering-15"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Release">Release</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=2" title="Edit section: Release"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>GPT-2 was first announced on 14 February 2019. A February 2019 article in <i><a href="/wiki/The_Verge" title="The Verge">The Verge</a></i> by James Vincent said that, while "[the] writing it produces is usually easily identifiable as non-human", it remained "one of the most exciting examples yet" of language generation programs:<sup id="cite_ref-verge1_17-0" class="reference"><a href="#cite_note-verge1-17"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup> </p> <blockquote><p>Give it a fake headline, and it’ll write the rest of the article, complete with fake quotations and statistics. Feed it the first line of a short story, and it’ll tell you what happens to your character next. It can even write fan fiction, given the right prompt.<sup id="cite_ref-verge1_17-1" class="reference"><a href="#cite_note-verge1-17"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup></p></blockquote> <p><i><a href="/wiki/The_Guardian" title="The Guardian">The Guardian</a></i> described this output as "plausible newspaper prose";<sup id="cite_ref-guardian_8-1" class="reference"><a href="#cite_note-guardian-8"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup> <a href="/wiki/Kelsey_Piper" title="Kelsey Piper">Kelsey Piper</a> of <i><a href="/wiki/Vox_Media" title="Vox Media">Vox</a></i> said "one of the coolest AI systems I’ve ever seen may also be the one that will kick me out of my job".<sup id="cite_ref-voxxy_18-0" class="reference"><a href="#cite_note-voxxy-18"><span class="cite-bracket">[</span>18<span class="cite-bracket">]</span></a></sup> GPT-2's flexibility was described as "impressive" by <i><a href="/wiki/The_Verge" title="The Verge">The Verge</a></i>; specifically, its ability to <a href="/wiki/Machine_translation" title="Machine translation">translate text</a> between languages, summarize long articles, and answer trivia questions were noted.<sup id="cite_ref-verge1_17-2" class="reference"><a href="#cite_note-verge1-17"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup> </p><p>A study by the <a href="/wiki/University_of_Amsterdam" title="University of Amsterdam">University of Amsterdam</a> employing a modified <a href="/wiki/Turing_test" title="Turing test">Turing test</a> found that at least in some scenarios, participants were unable to distinguish poems generated by GPT-2 from those written by humans.<sup id="cite_ref-19" class="reference"><a href="#cite_note-19"><span class="cite-bracket">[</span>19<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Restrictions_and_partial_release">Restrictions and partial release</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=3" title="Edit section: Restrictions and partial release"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:GPT-2-ProSkub-AntiSkub.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/e/e5/GPT-2-ProSkub-AntiSkub.png/220px-GPT-2-ProSkub-AntiSkub.png" decoding="async" width="220" height="116" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/e/e5/GPT-2-ProSkub-AntiSkub.png/330px-GPT-2-ProSkub-AntiSkub.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/e/e5/GPT-2-ProSkub-AntiSkub.png/440px-GPT-2-ProSkub-AntiSkub.png 2x" data-file-width="3829" data-file-height="2016" /></a><figcaption>While "Skub" is not a real product, even the reduced-size model used in DistilGPT2 is capable of creating plausible arguments both for and against it.</figcaption></figure> <p>While previous OpenAI models had been made immediately available to the public, OpenAI initially refused to make a public release of GPT-2's source code when announcing it in February, citing the risk of malicious use;<sup id="cite_ref-guardian_8-2" class="reference"><a href="#cite_note-guardian-8"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup> limited access to the model (i.e. an interface that allowed input and provided output, not the source code itself) was allowed for selected press outlets on announcement.<sup id="cite_ref-guardian_8-3" class="reference"><a href="#cite_note-guardian-8"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup> One commonly-cited justification was that, since generated text was usually completely novel, it could be used by <a href="/wiki/Spamming" title="Spamming">spammers</a> to evade automated <a href="/wiki/Spam_filter" class="mw-redirect" title="Spam filter">filters</a>; OpenAI demonstrated a version of GPT-2 fine-tuned to "generate infinite positive – or negative – reviews of products".<sup id="cite_ref-guardian_8-4" class="reference"><a href="#cite_note-guardian-8"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup> </p><p>Another justification was that GPT-2 could be used to generate text that was <a href="/wiki/Obscene" class="mw-redirect" title="Obscene">obscene</a> or <a href="/wiki/Racist" class="mw-redirect" title="Racist">racist</a>. Researchers such as Jeremy Howard warned of "the technology to totally fill Twitter, email, and the web up with reasonable-sounding, context-appropriate prose, which would drown out all other speech and be impossible to filter".<sup id="cite_ref-verge1_17-3" class="reference"><a href="#cite_note-verge1-17"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup> The <a href="/wiki/Allen_Institute_for_Artificial_Intelligence" class="mw-redirect" title="Allen Institute for Artificial Intelligence">Allen Institute for Artificial Intelligence</a>, in response to GPT-2, announced a tool to detect "neural fake news".<sup id="cite_ref-neuralfakesnooze_20-0" class="reference"><a href="#cite_note-neuralfakesnooze-20"><span class="cite-bracket">[</span>20<span class="cite-bracket">]</span></a></sup> </p><p>However, opinion was divided. A February 2019 article in <i>The Verge</i> argued that the threat posed by GPT-2 had been exaggerated;<sup id="cite_ref-ethics_21-0" class="reference"><a href="#cite_note-ethics-21"><span class="cite-bracket">[</span>21<span class="cite-bracket">]</span></a></sup> <a href="/wiki/Anima_Anandkumar" title="Anima Anandkumar">Anima Anandkumar</a>, a professor at <a href="/wiki/Caltech" class="mw-redirect" title="Caltech">Caltech</a> and director of machine learning research at <a href="/wiki/Nvidia" title="Nvidia">Nvidia</a>, said that there was no evidence that GPT-2 had the capabilities to pose the threats described by OpenAI, and that what they did was the "opposite of open", characterizing their refusal to release the full model as "malicious <a href="/wiki/Bullshit" title="Bullshit">BS</a>".<sup id="cite_ref-ethics_21-1" class="reference"><a href="#cite_note-ethics-21"><span class="cite-bracket">[</span>21<span class="cite-bracket">]</span></a></sup> <i>The Gradient</i> published an open letter to OpenAI requesting that they release the model publicly, comparing the threat posed by text-generation AI to the threat posed by the <a href="/wiki/Printing_press" title="Printing press">printing press</a>, and giving <a href="/wiki/Photoshop" class="mw-redirect" title="Photoshop">Photoshop</a> as an example of "a technology that has (thankfully) not destroyed modern society despite its potential for chaos":<sup id="cite_ref-pls_22-0" class="reference"><a href="#cite_note-pls-22"><span class="cite-bracket">[</span>22<span class="cite-bracket">]</span></a></sup> </p> <blockquote><p>Thirty years later, society has emerged relatively unscathed despite Photoshop being simple enough for high school students to use and ubiquitous enough to commandeer its own verb. Why? Precisely because everyone knows about Photoshop.<sup id="cite_ref-pls_22-1" class="reference"><a href="#cite_note-pls-22"><span class="cite-bracket">[</span>22<span class="cite-bracket">]</span></a></sup></p></blockquote> <div class="mw-heading mw-heading3"><h3 id="774M_release">774M release</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=4" title="Edit section: 774M release"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>While OpenAI did not release the fully-trained model or the corpora it was trained on, description of their methods in prior publications (and the free availability of underlying technology) made it possible for GPT-2 to be replicated by others as <a href="/wiki/Free_software" title="Free software">free software</a>; one such replication, OpenGPT-2, was released in August 2019, in conjunction with a freely licensed version of WebText called OpenWebText. The cloud compute costs for OpenGPT-2 were given as approximately $50,000.<sup id="cite_ref-opengpt2_23-0" class="reference"><a href="#cite_note-opengpt2-23"><span class="cite-bracket">[</span>23<span class="cite-bracket">]</span></a></sup> </p><p>On August 20, 2019, OpenAI released a partial version of GPT-2, with 774 million parameters (roughly half the size of the full 1.5 billion parameter model).<sup id="cite_ref-vb_24-0" class="reference"><a href="#cite_note-vb-24"><span class="cite-bracket">[</span>24<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Full_1.5B_release">Full 1.5B release</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=5" title="Edit section: Full 1.5B release"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Initial concerns that GPT-2 would lend itself to widespread misuse did not come to pass; <i>The Verge</i> said that "there are reasons to be skeptical about claims that AI technology will usher in some sort of ‘infopocalypse.’ For a start, we already have programs that can generate plausible text at high volume for little cost: humans."<sup id="cite_ref-reddit_25-0" class="reference"><a href="#cite_note-reddit-25"><span class="cite-bracket">[</span>25<span class="cite-bracket">]</span></a></sup> By November 2019, OpenAI said that they had "seen no strong evidence of misuse so far", and the full version, with 1.5 billion parameters trained with forty gigabytes of data, "about eight thousand times larger than the collected works of Shakespeare",<sup id="cite_ref-Murati_26-0" class="reference"><a href="#cite_note-Murati-26"><span class="cite-bracket">[</span>26<span class="cite-bracket">]</span></a></sup> was released on November 5, 2019.<sup id="cite_ref-verge2_3-1" class="reference"><a href="#cite_note-verge2-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-15Brelease_4-1" class="reference"><a href="#cite_note-15Brelease-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Small_and_Medium_Releases">Small and Medium Releases</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=6" title="Edit section: Small and Medium Releases"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Two other smaller releases of GPT-2 are available, including the small version of 124M parameters and the medium size of 355M parameters. Both are available to download from Huggingface.<sup id="cite_ref-27" class="reference"><a href="#cite_note-27"><span class="cite-bracket">[</span>27<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-28" class="reference"><a href="#cite_note-28"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Limitations">Limitations</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=7" title="Edit section: Limitations"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:GPT-2-Trump_Asuka.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/2/25/GPT-2-Trump_Asuka.png/220px-GPT-2-Trump_Asuka.png" decoding="async" width="220" height="232" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/2/25/GPT-2-Trump_Asuka.png/330px-GPT-2-Trump_Asuka.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/2/25/GPT-2-Trump_Asuka.png/440px-GPT-2-Trump_Asuka.png 2x" data-file-width="1914" data-file-height="2015" /></a><figcaption>GPT-2 can generate thematically appropriate text for a range of scenarios, even surreal ones like a <a href="/wiki/CNN" title="CNN">CNN</a> article about <a href="/wiki/Donald_Trump" title="Donald Trump">Donald Trump</a> giving a speech praising the anime character <a href="/wiki/Asuka_Langley_Soryu" title="Asuka Langley Soryu">Asuka Langley Soryu</a>. Here, the tendency to generate nonsensical and repetitive text with increasing output length (even in the full 1.5B model) can be seen; in the second paragraph, grammar begins to deteriorate, and the output eventually becomes one incoherent sentence repeated over and over.</figcaption></figure> <p>While GPT-2's ability to generate plausible passages of natural language text were generally remarked on positively, its shortcomings were noted as well, especially when generating texts longer than a couple paragraphs; <i>Vox</i> said "the prose is pretty rough, there’s the occasional non-sequitur, and the articles get less coherent the longer they get".<sup id="cite_ref-voxxy_18-1" class="reference"><a href="#cite_note-voxxy-18"><span class="cite-bracket">[</span>18<span class="cite-bracket">]</span></a></sup> <i>The Verge</i> similarly noted that longer samples of GPT-2 writing tended to "stray off topic" and lack overall coherence;<sup id="cite_ref-verge1_17-4" class="reference"><a href="#cite_note-verge1-17"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup> <i><a href="/wiki/The_Register" title="The Register">The Register</a></i> opined that "a human reading it should, after a short while, realize something's up", and noted that "GPT-2 doesn't answer questions as well as other systems that rely on algorithms to extract and retrieve information."<sup id="cite_ref-register_14-1" class="reference"><a href="#cite_note-register-14"><span class="cite-bracket">[</span>14<span class="cite-bracket">]</span></a></sup> </p><p>GPT-2 deployment is resource-intensive; the full version of the model is larger than five gigabytes, making it difficult to embed locally into applications, and consumes large amounts of RAM. In addition, performing a single prediction "can occupy a CPU at 100% utilization for several minutes", and even with <a href="/wiki/GPU" class="mw-redirect" title="GPU">GPU</a> processing, "a single prediction can take seconds". To alleviate these issues, the company <a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a> created <b>DistilGPT2</b>, using <a href="/wiki/Knowledge_distillation" title="Knowledge distillation">knowledge distillation</a> to produce a smaller model that "scores a few points lower on some quality benchmarks", but is "33% smaller and twice as fast".<sup class="noprint Inline-Template Template-Fact" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources. (June 2024)">citation needed</span></a></i>]</sup> </p> <div class="mw-heading mw-heading2"><h2 id="Application_and_subsequent_research">Application and subsequent research</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=8" title="Edit section: Application and subsequent research"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Even before the release of the full version, GPT-2 was used for a variety of applications and services, as well as for entertainment. In June 2019, a <a href="/wiki/Subreddit" class="mw-redirect" title="Subreddit">subreddit</a> named r/SubSimulatorGPT2 was created in which a variety of GPT-2 instances trained on different subreddits made posts and replied to each other's comments, creating a situation where one could observe "an AI personification of r/Bitcoin argue with the machine learning-derived spirit of r/ShittyFoodPorn";<sup id="cite_ref-reddit_25-1" class="reference"><a href="#cite_note-reddit-25"><span class="cite-bracket">[</span>25<span class="cite-bracket">]</span></a></sup> by July of that year, a GPT-2-based software program released to <a href="/wiki/Autocomplete" title="Autocomplete">autocomplete</a> lines of code in a variety of programming languages was described by users as a "game-changer".<sup id="cite_ref-smartcompose_29-0" class="reference"><a href="#cite_note-smartcompose-29"><span class="cite-bracket">[</span>29<span class="cite-bracket">]</span></a></sup> </p><p>In 2019, <a href="/wiki/AI_Dungeon" title="AI Dungeon">AI Dungeon</a> was launched, which used GPT-2 to generate dynamic <a href="/wiki/Text_adventures" class="mw-redirect" title="Text adventures">text adventures</a> based on user input.<sup id="cite_ref-aid2_30-0" class="reference"><a href="#cite_note-aid2-30"><span class="cite-bracket">[</span>30<span class="cite-bracket">]</span></a></sup> AI Dungeon now offers access to the largest release of <a href="/wiki/GPT-3" title="GPT-3">GPT-3</a> API as an optional paid upgrade, the free version of the site uses the 2nd largest release of GPT-3.<sup id="cite_ref-aidungeon_31-0" class="reference"><a href="#cite_note-aidungeon-31"><span class="cite-bracket">[</span>31<span class="cite-bracket">]</span></a></sup> Latitude, the company formed around AI Dungeon, raised $3.3 million in <a href="/wiki/Seed_funding" class="mw-redirect" title="Seed funding">seed funding</a> in 2021.<sup id="cite_ref-tclat_32-0" class="reference"><a href="#cite_note-tclat-32"><span class="cite-bracket">[</span>32<span class="cite-bracket">]</span></a></sup> Several websites host interactive demonstrations of different instances of GPT-2 and other transformer models.<sup id="cite_ref-33" class="reference"><a href="#cite_note-33"><span class="cite-bracket">[</span>33<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-34" class="reference"><a href="#cite_note-34"><span class="cite-bracket">[</span>34<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-35" class="reference"><a href="#cite_note-35"><span class="cite-bracket">[</span>35<span class="cite-bracket">]</span></a></sup> </p><p>In February 2021, a crisis center for troubled teens announced that they would begin using a GPT-2-derived chatbot to help train counselors by allowing them to have conversations with simulated teens (this use was purely for internal purposes, and did not involve having GPT-2 communicate with the teens themselves).<sup id="cite_ref-teens_36-0" class="reference"><a href="#cite_note-teens-36"><span class="cite-bracket">[</span>36<span class="cite-bracket">]</span></a></sup> </p><p>On May 9, 2023, OpenAI released a mapped version of GPT-2. OpenAI used successor model, <a href="/wiki/GPT-4" title="GPT-4">GPT-4</a>, to map each neuron of GPT-2 to determine their functions.<sup id="cite_ref-37" class="reference"><a href="#cite_note-37"><span class="cite-bracket">[</span>37<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Performance_and_evaluation">Performance and evaluation</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=9" title="Edit section: Performance and evaluation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:GPT-2-PresidentSnowden.PNG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/2/28/GPT-2-PresidentSnowden.PNG/220px-GPT-2-PresidentSnowden.PNG" decoding="async" width="220" height="109" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/2/28/GPT-2-PresidentSnowden.PNG/330px-GPT-2-PresidentSnowden.PNG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/2/28/GPT-2-PresidentSnowden.PNG/440px-GPT-2-PresidentSnowden.PNG 2x" data-file-width="1899" data-file-height="944" /></a><figcaption>GPT-2 writing a fictional news article about <a href="/wiki/Edward_Snowden" title="Edward Snowden">Edward Snowden</a>'s actions after winning the <a href="/wiki/2020_United_States_presidential_election" title="2020 United States presidential election">2020 United States presidential election</a> (all highlighted text is machine-generated). While Snowden had (at the time of generation) never been elected to public office, the generated sample is grammatically and stylistically valid.</figcaption></figure> <p>GPT-2 became capable of performing a variety of tasks beyond simple text production due to the breadth of its dataset and technique: answering questions, summarizing, and even <a href="/wiki/Machine_translation" title="Machine translation">translating</a> between languages in a variety of <a href="/wiki/Domain-specific" class="mw-redirect" title="Domain-specific">specific domains</a>, without being instructed in anything beyond how to predict the next word in a sequence.<sup id="cite_ref-verge1_17-5" class="reference"><a href="#cite_note-verge1-17"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-voxxy_18-2" class="reference"><a href="#cite_note-voxxy-18"><span class="cite-bracket">[</span>18<span class="cite-bracket">]</span></a></sup> </p><p>One example of generalized learning is GPT-2's ability to perform machine translation between French and English, for which task GPT-2's performance was assessed using WMT-14 translation tasks. GPT-2's training corpus included virtually no French text; non-English text was deliberately removed while cleaning the dataset prior to training, and as a consequence, only 10MB of French of the remaining 40,000MB was available for the model to learn from (mostly from foreign-language quotations in English posts and articles).<sup id="cite_ref-gpt2paper_2-4" class="reference"><a href="#cite_note-gpt2paper-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup> </p><p>Despite this, GPT-2 achieved 5 BLEU on the WMT-14 English-to-French test set (slightly below the score of a translation via word-for-word substitution). It was also able to outperform several contemporary (2017) unsupervised machine translation baselines on the French-to-English test set, where GPT-2 achieved 11.5 BLEU. This remained below the highest-performing contemporary unsupervised approach (2019), which had achieved 33.5 BLEU.<sup id="cite_ref-gpt2paper_2-5" class="reference"><a href="#cite_note-gpt2paper-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup> However, other models used large amounts of French text to achieve these results; GPT-2 was estimated to have used a monolingual French corpus approximately 1/500 the size of comparable approaches.<sup id="cite_ref-gpt2paper_2-6" class="reference"><a href="#cite_note-gpt2paper-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup> </p> <table class="wikitable"> <caption> </caption> <tbody><tr> <th> </th> <th>architecture </th> <th>parameter count </th> <th>training data </th></tr> <tr> <td><a href="/wiki/GPT-1" title="GPT-1">GPT-1</a> </td> <td>12-level, 12-headed Transformer decoder (no encoder), followed by linear-softmax. </td> <td>0.12 billion </td> <td><a href="/wiki/BookCorpus" title="BookCorpus">BookCorpus</a>:<sup id="cite_ref-38" class="reference"><a href="#cite_note-38"><span class="cite-bracket">[</span>38<span class="cite-bracket">]</span></a></sup> 4.5 GB of text, from 7000 unpublished books of various genres. </td></tr> <tr> <td>GPT-2 </td> <td>GPT-1, but with modified normalization </td> <td>1.5 billion </td> <td>WebText: 40 GB<sup id="cite_ref-Murati_26-1" class="reference"><a href="#cite_note-Murati-26"><span class="cite-bracket">[</span>26<span class="cite-bracket">]</span></a></sup> of text, 8 million documents, from 45 million webpages upvoted on Reddit. </td></tr> <tr> <td><a href="/wiki/GPT-3" title="GPT-3">GPT-3</a> </td> <td>GPT-2, but with modification to allow larger scaling. </td> <td>175 billion </td> <td>570 GB plaintext, 300 billion tokens of CommonCrawl, WebText, English Wikipedia, and two books corpora (Books1 and Books2). </td></tr></tbody></table> <p>GPT-2 was to be followed by the 175-billion-parameter <a href="/wiki/GPT-3" title="GPT-3">GPT-3</a>,<sup id="cite_ref-gpt3paper_39-0" class="reference"><a href="#cite_note-gpt3paper-39"><span class="cite-bracket">[</span>39<span class="cite-bracket">]</span></a></sup> revealed to the public in 2020<sup id="cite_ref-Arram_20200709_40-0" class="reference"><a href="#cite_note-Arram_20200709-40"><span class="cite-bracket">[</span>40<span class="cite-bracket">]</span></a></sup> (whose source code has never been made available). Access to GPT-3 is provided exclusively through <a href="/wiki/Application_programming_interface" class="mw-redirect" title="Application programming interface">APIs</a> offered by OpenAI and <a href="/wiki/Microsoft" title="Microsoft">Microsoft</a>.<sup id="cite_ref-GPT3microsoft_41-0" class="reference"><a href="#cite_note-GPT3microsoft-41"><span class="cite-bracket">[</span>41<span class="cite-bracket">]</span></a></sup> That was then later followed by <a href="/wiki/GPT-4" title="GPT-4">GPT-4</a>. </p> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=GPT-2&action=edit&section=10" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://github.com/openai/gpt-2">"gpt-2"</a>. <i>GitHub</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20230311154936/https://github.com/openai/gpt-2">Archived</a> from the original on 11 March 2023<span class="reference-accessdate">. Retrieved <span class="nowrap">13 March</span> 2023</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=GitHub&rft.atitle=gpt-2&rft_id=https%3A%2F%2Fgithub.com%2Fopenai%2Fgpt-2&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-gpt2paper-2"><span class="mw-cite-backlink">^ <a href="#cite_ref-gpt2paper_2-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-gpt2paper_2-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-gpt2paper_2-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-gpt2paper_2-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-gpt2paper_2-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-gpt2paper_2-5"><sup><i><b>f</b></i></sup></a> <a href="#cite_ref-gpt2paper_2-6"><sup><i><b>g</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFRadfordWuChildLuan2019" class="citation journal cs1">Radford, Alec; Wu, Jeffrey; Child, Rewon; Luan, David; Amodei, Dario; Sutskever, Ilua (14 February 2019). <a rel="nofollow" class="external text" href="https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf">"Language models are unsupervised multitask learners"</a> <span class="cs1-format">(PDF)</span>. <i>OpenAI</i>. <b>1</b> (8). <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210206183945/https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf">Archived</a> <span class="cs1-format">(PDF)</span> from the original on 6 February 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">19 December</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=OpenAI&rft.atitle=Language+models+are+unsupervised+multitask+learners&rft.volume=1&rft.issue=8&rft.date=2019-02-14&rft.aulast=Radford&rft.aufirst=Alec&rft.au=Wu%2C+Jeffrey&rft.au=Child%2C+Rewon&rft.au=Luan%2C+David&rft.au=Amodei%2C+Dario&rft.au=Sutskever%2C+Ilua&rft_id=https%3A%2F%2Fcdn.openai.com%2Fbetter-language-models%2Flanguage_models_are_unsupervised_multitask_learners.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-verge2-3"><span class="mw-cite-backlink">^ <a href="#cite_ref-verge2_3-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-verge2_3-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFVincent2019" class="citation web cs1">Vincent, James (7 November 2019). <a rel="nofollow" class="external text" href="https://www.theverge.com/2019/11/7/20953040/openai-text-generation-ai-gpt-2-full-model-release-1-5b-parameters">"OpenAI has published the text-generating AI it said was too dangerous to share"</a>. <i><a href="/wiki/The_Verge" title="The Verge">The Verge</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20200611054114/https://www.theverge.com/2019/11/7/20953040/openai-text-generation-ai-gpt-2-full-model-release-1-5b-parameters">Archived</a> from the original on 11 June 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">19 December</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=The+Verge&rft.atitle=OpenAI+has+published+the+text-generating+AI+it+said+was+too+dangerous+to+share&rft.date=2019-11-07&rft.aulast=Vincent&rft.aufirst=James&rft_id=https%3A%2F%2Fwww.theverge.com%2F2019%2F11%2F7%2F20953040%2Fopenai-text-generation-ai-gpt-2-full-model-release-1-5b-parameters&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-15Brelease-4"><span class="mw-cite-backlink">^ <a href="#cite_ref-15Brelease_4-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-15Brelease_4-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://openai.com/blog/gpt-2-1-5b-release/">"GPT-2: 1.5B Release"</a>. <i>OpenAI</i>. 2019-11-05. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20191114074358/https://openai.com/blog/gpt-2-1-5b-release/">Archived</a> from the original on 2019-11-14<span class="reference-accessdate">. Retrieved <span class="nowrap">2019-11-14</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=OpenAI&rft.atitle=GPT-2%3A+1.5B+Release&rft.date=2019-11-05&rft_id=https%3A%2F%2Fopenai.com%2Fblog%2Fgpt-2-1-5b-release%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-openai-5"><span class="mw-cite-backlink">^ <a href="#cite_ref-openai_5-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-openai_5-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://openai.com/blog/better-language-models/">"Better Language Models and Their Implications"</a>. <a href="/wiki/OpenAI" title="OpenAI">OpenAI</a>. 14 February 2019. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201219132206/https://openai.com/blog/better-language-models/">Archived</a> from the original on 19 December 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">19 December</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Better+Language+Models+and+Their+Implications&rft.pub=OpenAI&rft.date=2019-02-14&rft_id=https%3A%2F%2Fopenai.com%2Fblog%2Fbetter-language-models%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-gpt1paper-6"><span class="mw-cite-backlink">^ <a href="#cite_ref-gpt1paper_6-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-gpt1paper_6-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-gpt1paper_6-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFRadfordNarasimhanSalimansSutskever2018" class="citation web cs1">Radford, Alec; Narasimhan, Karthik; Salimans, Tim; Sutskever, Ilya (11 June 2018). <a rel="nofollow" class="external text" href="https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf">"Improving Language Understanding by Generative Pre-Training"</a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/OpenAI" title="OpenAI">OpenAI</a>. p. 12. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210126024542/https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf">Archived</a> <span class="cs1-format">(PDF)</span> from the original on 26 January 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">23 January</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Improving+Language+Understanding+by+Generative+Pre-Training&rft.pages=12&rft.pub=OpenAI&rft.date=2018-06-11&rft.aulast=Radford&rft.aufirst=Alec&rft.au=Narasimhan%2C+Karthik&rft.au=Salimans%2C+Tim&rft.au=Sutskever%2C+Ilya&rft_id=https%3A%2F%2Fcdn.openai.com%2Fresearch-covers%2Flanguage-unsupervised%2Flanguage_understanding_paper.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-badpaper-7"><span class="mw-cite-backlink">^ <a href="#cite_ref-badpaper_7-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-badpaper_7-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHegdePatil2020" class="citation arxiv cs1">Hegde, Chaitra; Patil, Shrikumar (9 June 2020). "Unsupervised Paraphrase Generation using Pre-trained Language Models". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2006.05477">2006.05477</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=preprint&rft.jtitle=arXiv&rft.atitle=Unsupervised+Paraphrase+Generation+using+Pre-trained+Language+Models&rft.date=2020-06-09&rft_id=info%3Aarxiv%2F2006.05477&rft.aulast=Hegde&rft.aufirst=Chaitra&rft.au=Patil%2C+Shrikumar&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-guardian-8"><span class="mw-cite-backlink">^ <a href="#cite_ref-guardian_8-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-guardian_8-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-guardian_8-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-guardian_8-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-guardian_8-4"><sup><i><b>e</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHern2019" class="citation web cs1">Hern, Alex (14 February 2019). <a rel="nofollow" class="external text" href="https://www.theguardian.com/technology/2019/feb/14/elon-musk-backed-ai-writes-convincing-news-fiction">"New AI fake text generator may be too dangerous to release, say creators"</a>. <i><a href="/wiki/The_Guardian" title="The Guardian">The Guardian</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20190214173112/https://www.theguardian.com/technology/2019/feb/14/elon-musk-backed-ai-writes-convincing-news-fiction">Archived</a> from the original on 14 February 2019<span class="reference-accessdate">. Retrieved <span class="nowrap">19 December</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=The+Guardian&rft.atitle=New+AI+fake+text+generator+may+be+too+dangerous+to+release%2C+say+creators&rft.date=2019-02-14&rft.aulast=Hern&rft.aufirst=Alex&rft_id=https%3A%2F%2Fwww.theguardian.com%2Ftechnology%2F2019%2Ffeb%2F14%2Felon-musk-backed-ai-writes-convincing-news-fiction&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-attention-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-attention_9-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFVaswaniShazeerParmarUszkoreit2017" class="citation journal cs1"><a href="/wiki/Ashish_Vaswani" title="Ashish Vaswani">Vaswani, Ashish</a>; Shazeer, Noam; Parmar, Niki; Uszkoreit, Jakob; Jones, Llion; <a href="/wiki/Aidan_Gomez" title="Aidan Gomez">Gomez, Aidan N</a>; Kaiser, Łukasz; Polosukhin, Illia (2017). <a rel="nofollow" class="external text" href="https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf">"Attention is All you Need"</a> <span class="cs1-format">(PDF)</span>. <i>Advances in Neural Information Processing Systems</i>. <b>30</b>. Curran Associates, Inc.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Advances+in+Neural+Information+Processing+Systems&rft.atitle=Attention+is+All+you+Need&rft.volume=30&rft.date=2017&rft.aulast=Vaswani&rft.aufirst=Ashish&rft.au=Shazeer%2C+Noam&rft.au=Parmar%2C+Niki&rft.au=Uszkoreit%2C+Jakob&rft.au=Jones%2C+Llion&rft.au=Gomez%2C+Aidan+N&rft.au=Kaiser%2C+%C5%81ukasz&rft.au=Polosukhin%2C+Illia&rft_id=https%3A%2F%2Fproceedings.neurips.cc%2Fpaper%2F2017%2Ffile%2F3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-attentionRNNs-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-attentionRNNs_10-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFOlahCarter2016" class="citation journal cs1">Olah, Chris; Carter, Shan (8 September 2016). <a rel="nofollow" class="external text" href="https://distill.pub/2016/augmented-rnns/">"Attention and Augmented Recurrent Neural Networks"</a>. <i>Distill</i>. <b>1</b> (9). <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.23915%2Fdistill.00001">10.23915/distill.00001</a></span>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201222104024/https://distill.pub/2016/augmented-rnns/">Archived</a> from the original on 22 December 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">22 January</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Distill&rft.atitle=Attention+and+Augmented+Recurrent+Neural+Networks&rft.volume=1&rft.issue=9&rft.date=2016-09-08&rft_id=info%3Adoi%2F10.23915%2Fdistill.00001&rft.aulast=Olah&rft.aufirst=Chris&rft.au=Carter%2C+Shan&rft_id=https%3A%2F%2Fdistill.pub%2F2016%2Faugmented-rnns%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-jointly-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-jointly_11-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBahdanauChoBengio2014" class="citation arxiv cs1">Bahdanau, Dzmitry; Cho, Kyunghyun; Bengio, Yoshua (1 September 2014). "Neural Machine Translation by Jointly Learning to Align and Translate". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1409.0473">1409.0473</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=preprint&rft.jtitle=arXiv&rft.atitle=Neural+Machine+Translation+by+Jointly+Learning+to+Align+and+Translate&rft.date=2014-09-01&rft_id=info%3Aarxiv%2F1409.0473&rft.aulast=Bahdanau&rft.aufirst=Dzmitry&rft.au=Cho%2C+Kyunghyun&rft.au=Bengio%2C+Yoshua&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-effective-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-effective_12-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFLuongPhamManning2015" class="citation arxiv cs1">Luong, Minh-Thang; Pham, Hieu; Manning, Christopher D. (17 August 2015). "Effective Approaches to Attention-based Neural Machine Translation". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1508.04025">1508.04025</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=preprint&rft.jtitle=arXiv&rft.atitle=Effective+Approaches+to+Attention-based+Neural+Machine+Translation&rft.date=2015-08-17&rft_id=info%3Aarxiv%2F1508.04025&rft.aulast=Luong&rft.aufirst=Minh-Thang&rft.au=Pham%2C+Hieu&rft.au=Manning%2C+Christopher+D.&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-commoncrawl-13"><span class="mw-cite-backlink">^ <a href="#cite_ref-commoncrawl_13-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-commoncrawl_13-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFTrinhLe2018" class="citation arxiv cs1">Trinh, Trieu H.; Le, Quoc V. (7 Jun 2018). "A Simple Method for Commonsense Reasoning". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1806.02847">1806.02847</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=preprint&rft.jtitle=arXiv&rft.atitle=A+Simple+Method+for+Commonsense+Reasoning&rft.date=2018-06-07&rft_id=info%3Aarxiv%2F1806.02847&rft.aulast=Trinh&rft.aufirst=Trieu+H.&rft.au=Le%2C+Quoc+V.&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-register-14"><span class="mw-cite-backlink">^ <a href="#cite_ref-register_14-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-register_14-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFQuach2019" class="citation web cs1">Quach, Katyanna (14 February 2019). <a rel="nofollow" class="external text" href="https://www.theregister.com/2019/02/14/open_ai_language_bot/">"Roses are red, this is sublime: We fed OpenAI's latest chat bot a classic Reg headline"</a>. <i>The Register</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210309093207/https://www.theregister.com/2019/02/14/open_ai_language_bot/">Archived</a> from the original on 9 March 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=The+Register&rft.atitle=Roses+are+red%2C+this+is+sublime%3A+We+fed+OpenAI%27s+latest+chat+bot+a+classic+Reg+headline&rft.date=2019-02-14&rft.aulast=Quach&rft.aufirst=Katyanna&rft_id=https%3A%2F%2Fwww.theregister.com%2F2019%2F02%2F14%2Fopen_ai_language_bot%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-staggering-15"><span class="mw-cite-backlink">^ <a href="#cite_ref-staggering_15-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-staggering_15-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://syncedreview.com/2019/06/27/the-staggering-cost-of-training-sota-ai-models/">"The Staggering Cost of Training SOTA AI Models"</a>. <i>Synced</i>. 27 June 2019. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201124204913/https://syncedreview.com/2019/06/27/the-staggering-cost-of-training-sota-ai-models/">Archived</a> from the original on 24 November 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Synced&rft.atitle=The+Staggering+Cost+of+Training+SOTA+AI+Models&rft.date=2019-06-27&rft_id=https%3A%2F%2Fsyncedreview.com%2F2019%2F06%2F27%2Fthe-staggering-cost-of-training-sota-ai-models%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-vb2-16"><span class="mw-cite-backlink"><b><a href="#cite_ref-vb2_16-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWiggers2020" class="citation web cs1">Wiggers, Kyle (23 March 2020). <a rel="nofollow" class="external text" href="https://venturebeat.com/2020/03/23/google-open-sources-framework-that-reduces-ai-training-costs-by-up-to-80/">"Google open-sources framework that reduces AI training costs by up to 80%"</a>. <i>VentureBeat</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201126044004/https://venturebeat.com/2020/03/23/google-open-sources-framework-that-reduces-ai-training-costs-by-up-to-80/">Archived</a> from the original on 26 November 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=VentureBeat&rft.atitle=Google+open-sources+framework+that+reduces+AI+training+costs+by+up+to+80%25&rft.date=2020-03-23&rft.aulast=Wiggers&rft.aufirst=Kyle&rft_id=https%3A%2F%2Fventurebeat.com%2F2020%2F03%2F23%2Fgoogle-open-sources-framework-that-reduces-ai-training-costs-by-up-to-80%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-verge1-17"><span class="mw-cite-backlink">^ <a href="#cite_ref-verge1_17-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-verge1_17-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-verge1_17-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-verge1_17-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-verge1_17-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-verge1_17-5"><sup><i><b>f</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFVincent2019" class="citation web cs1">Vincent, James (14 February 2019). <a rel="nofollow" class="external text" href="https://www.theverge.com/2019/2/14/18224704/ai-machine-learning-language-models-read-write-openai-gpt2">"OpenAI's new multitalented AI writes, translates, and slanders"</a>. <i><a href="/wiki/The_Verge" title="The Verge">The Verge</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201218091707/https://www.theverge.com/2019/2/14/18224704/ai-machine-learning-language-models-read-write-openai-gpt2">Archived</a> from the original on 18 December 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">19 December</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=The+Verge&rft.atitle=OpenAI%27s+new+multitalented+AI+writes%2C+translates%2C+and+slanders&rft.date=2019-02-14&rft.aulast=Vincent&rft.aufirst=James&rft_id=https%3A%2F%2Fwww.theverge.com%2F2019%2F2%2F14%2F18224704%2Fai-machine-learning-language-models-read-write-openai-gpt2&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-voxxy-18"><span class="mw-cite-backlink">^ <a href="#cite_ref-voxxy_18-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-voxxy_18-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-voxxy_18-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFPiper2019" class="citation web cs1">Piper, Kelsey (14 February 2019). <a rel="nofollow" class="external text" href="https://www.vox.com/future-perfect/2019/2/14/18222270/artificial-intelligence-open-ai-natural-language-processing">"An AI helped us write this article"</a>. <i><a href="/wiki/Vox_Media" title="Vox Media">Vox</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201108002620/https://www.vox.com/future-perfect/2019/2/14/18222270/artificial-intelligence-open-ai-natural-language-processing">Archived</a> from the original on 8 November 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">19 December</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Vox&rft.atitle=An+AI+helped+us+write+this+article&rft.date=2019-02-14&rft.aulast=Piper&rft.aufirst=Kelsey&rft_id=https%3A%2F%2Fwww.vox.com%2Ffuture-perfect%2F2019%2F2%2F14%2F18222270%2Fartificial-intelligence-open-ai-natural-language-processing&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-19"><span class="mw-cite-backlink"><b><a href="#cite_ref-19">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFKöbisMossink2021" class="citation journal cs1">Köbis, Nils; Mossink, Luca D. (1 January 2021). <a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.chb.2020.106553">"Artificial intelligence versus Maya Angelou: Experimental evidence that people cannot differentiate AI-generated from human-written poetry"</a>. <i>Computers in Human Behavior</i>. <b>114</b>: 106553. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.chb.2020.106553">10.1016/j.chb.2020.106553</a></span>. <a href="/wiki/Hdl_(identifier)" class="mw-redirect" title="Hdl (identifier)">hdl</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://hdl.handle.net/21.11116%2F0000-0007-13E5-1">21.11116/0000-0007-13E5-1</a></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Computers+in+Human+Behavior&rft.atitle=Artificial+intelligence+versus+Maya+Angelou%3A+Experimental+evidence+that+people+cannot+differentiate+AI-generated+from+human-written+poetry&rft.volume=114&rft.pages=106553&rft.date=2021-01-01&rft_id=info%3Ahdl%2F21.11116%2F0000-0007-13E5-1&rft_id=info%3Adoi%2F10.1016%2Fj.chb.2020.106553&rft.aulast=K%C3%B6bis&rft.aufirst=Nils&rft.au=Mossink%2C+Luca+D.&rft_id=https%3A%2F%2Fdoi.org%2F10.1016%252Fj.chb.2020.106553&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-neuralfakesnooze-20"><span class="mw-cite-backlink"><b><a href="#cite_ref-neuralfakesnooze_20-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSchwartz2019" class="citation news cs1">Schwartz, Oscar (4 July 2019). <a rel="nofollow" class="external text" href="https://www.theguardian.com/technology/2019/jul/04/ai-fake-text-gpt-2-concerns-false-information">"Could 'fake text' be the next global political threat?"</a>. <i>The Guardian</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20190716035703/https://www.theguardian.com/technology/2019/jul/04/ai-fake-text-gpt-2-concerns-false-information">Archived</a> from the original on 16 July 2019<span class="reference-accessdate">. Retrieved <span class="nowrap">16 July</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+Guardian&rft.atitle=Could+%27fake+text%27+be+the+next+global+political+threat%3F&rft.date=2019-07-04&rft.aulast=Schwartz&rft.aufirst=Oscar&rft_id=https%3A%2F%2Fwww.theguardian.com%2Ftechnology%2F2019%2Fjul%2F04%2Fai-fake-text-gpt-2-concerns-false-information&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-ethics-21"><span class="mw-cite-backlink">^ <a href="#cite_ref-ethics_21-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-ethics_21-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFVincent2019" class="citation web cs1">Vincent, James (21 February 2019). <a rel="nofollow" class="external text" href="https://www.theverge.com/2019/2/21/18234500/ai-ethics-debate-researchers-harmful-programs-openai">"AI researchers debate the ethics of sharing potentially harmful programs"</a>. The Verge. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210209123243/https://www.theverge.com/2019/2/21/18234500/ai-ethics-debate-researchers-harmful-programs-openai">Archived</a> from the original on 9 February 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=AI+researchers+debate+the+ethics+of+sharing+potentially+harmful+programs&rft.pub=The+Verge&rft.date=2019-02-21&rft.aulast=Vincent&rft.aufirst=James&rft_id=https%3A%2F%2Fwww.theverge.com%2F2019%2F2%2F21%2F18234500%2Fai-ethics-debate-researchers-harmful-programs-openai&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-pls-22"><span class="mw-cite-backlink">^ <a href="#cite_ref-pls_22-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-pls_22-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFZhang2019" class="citation web cs1">Zhang, Hugh (19 February 2019). <a rel="nofollow" class="external text" href="https://thegradient.pub/openai-please-open-source-your-language-model/">"OpenAI: Please Open Source Your Language Model"</a>. The Gradient. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210128091114/https://thegradient.pub/openai-please-open-source-your-language-model/">Archived</a> from the original on 28 January 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">28 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=OpenAI%3A+Please+Open+Source+Your+Language+Model&rft.pub=The+Gradient&rft.date=2019-02-19&rft.aulast=Zhang&rft.aufirst=Hugh&rft_id=https%3A%2F%2Fthegradient.pub%2Fopenai-please-open-source-your-language-model%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-opengpt2-23"><span class="mw-cite-backlink"><b><a href="#cite_ref-opengpt2_23-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGokaslanCohenPavlickTellex2019" class="citation web cs1">Gokaslan, Aaron; Cohen, Vanya; Pavlick, Ellie; Tellex, Stefanie (22 August 2019). <a rel="nofollow" class="external text" href="https://blog.usejournal.com/opengpt-2-we-replicated-gpt-2-because-you-can-too-45e34e6d36dc?gi=4c998b75b4da">"OpenGPT-2: We Replicated GPT-2 Because You Can Too"</a>. Noteworthy. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20230429232442/https://medium.com/@vanya_cohen/opengpt-2-we-replicated-gpt-2-because-you-can-too-45e34e6d36dc">Archived</a> from the original on 29 April 2023<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=OpenGPT-2%3A+We+Replicated+GPT-2+Because+You+Can+Too&rft.pub=Noteworthy&rft.date=2019-08-22&rft.aulast=Gokaslan&rft.aufirst=Aaron&rft.au=Cohen%2C+Vanya&rft.au=Pavlick%2C+Ellie&rft.au=Tellex%2C+Stefanie&rft_id=https%3A%2F%2Fblog.usejournal.com%2Fopengpt-2-we-replicated-gpt-2-because-you-can-too-45e34e6d36dc%3Fgi%3D4c998b75b4da&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-vb-24"><span class="mw-cite-backlink"><b><a href="#cite_ref-vb_24-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFJohnson2019" class="citation web cs1">Johnson, Khari (20 August 2019). <a rel="nofollow" class="external text" href="https://venturebeat.com/2019/08/20/openai-releases-curtailed-version-of-gpt-2-language-model/">"OpenAI releases curtailed version of GPT-2 language model"</a>. <a href="/wiki/VentureBeat" title="VentureBeat">VentureBeat</a>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201218130530/https://venturebeat.com/2019/08/20/openai-releases-curtailed-version-of-gpt-2-language-model/">Archived</a> from the original on 18 December 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">19 December</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=OpenAI+releases+curtailed+version+of+GPT-2+language+model&rft.pub=VentureBeat&rft.date=2019-08-20&rft.aulast=Johnson&rft.aufirst=Khari&rft_id=https%3A%2F%2Fventurebeat.com%2F2019%2F08%2F20%2Fopenai-releases-curtailed-version-of-gpt-2-language-model%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-reddit-25"><span class="mw-cite-backlink">^ <a href="#cite_ref-reddit_25-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-reddit_25-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFVincent2019" class="citation web cs1">Vincent, James (6 June 2019). <a rel="nofollow" class="external text" href="https://www.theverge.com/2019/6/6/18655212/reddit-ai-bots-gpt2-openai-text-artificial-intelligence-subreddit">"There's a subreddit populated entirely by AI personifications of other subreddits"</a>. <i>The Verge</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210221143933/https://www.theverge.com/2019/6/6/18655212/reddit-ai-bots-gpt2-openai-text-artificial-intelligence-subreddit">Archived</a> from the original on 21 February 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=The+Verge&rft.atitle=There%27s+a+subreddit+populated+entirely+by+AI+personifications+of+other+subreddits&rft.date=2019-06-06&rft.aulast=Vincent&rft.aufirst=James&rft_id=https%3A%2F%2Fwww.theverge.com%2F2019%2F6%2F6%2F18655212%2Freddit-ai-bots-gpt2-openai-text-artificial-intelligence-subreddit&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-Murati-26"><span class="mw-cite-backlink">^ <a href="#cite_ref-Murati_26-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Murati_26-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMurati2022" class="citation web cs1">Murati, Ermira (2022-04-13). <a rel="nofollow" class="external text" href="https://www.amacad.org/publication/language-coding-creativity">"Language & Coding Creativity | American Academy of Arts and Sciences"</a>. <i>www.amacad.org</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-03-18</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=www.amacad.org&rft.atitle=Language+%26+Coding+Creativity+%7C+American+Academy+of+Arts+and+Sciences&rft.date=2022-04-13&rft.aulast=Murati&rft.aufirst=Ermira&rft_id=https%3A%2F%2Fwww.amacad.org%2Fpublication%2Flanguage-coding-creativity&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-27"><span class="mw-cite-backlink"><b><a href="#cite_ref-27">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://huggingface.co/gpt2">"GPT-2 Small"</a><span class="reference-accessdate">. Retrieved <span class="nowrap">October 29,</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=GPT-2+Small&rft_id=https%3A%2F%2Fhuggingface.co%2Fgpt2&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-28"><span class="mw-cite-backlink"><b><a href="#cite_ref-28">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGPT-2_Medium" class="citation web cs1">GPT-2 Medium. <a rel="nofollow" class="external text" href="https://huggingface.co/openai-community/gpt2-medium">"Openai-community/Gpt2-medium · Hugging Face"</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Openai-community%2FGpt2-medium+%C2%B7+Hugging+Face&rft.au=GPT-2+Medium&rft_id=https%3A%2F%2Fhuggingface.co%2Fopenai-community%2Fgpt2-medium&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span><span class="cs1-maint citation-comment"><code class="cs1-code">{{<a href="/wiki/Template:Cite_web" title="Template:Cite web">cite web</a>}}</code>: CS1 maint: numeric names: authors list (<a href="/wiki/Category:CS1_maint:_numeric_names:_authors_list" title="Category:CS1 maint: numeric names: authors list">link</a>)</span></span> </li> <li id="cite_note-smartcompose-29"><span class="mw-cite-backlink"><b><a href="#cite_ref-smartcompose_29-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFVincent2019" class="citation web cs1">Vincent, James (24 July 2019). <a rel="nofollow" class="external text" href="https://www.theverge.com/2019/7/24/20708542/coding-autocompleter-deep-tabnine-ai-deep-learning-smart-compose">"This AI-powered autocompletion software is Gmail's Smart Compose for coders"</a>. <i>The Verge</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210309082626/https://www.theverge.com/2019/7/24/20708542/coding-autocompleter-deep-tabnine-ai-deep-learning-smart-compose">Archived</a> from the original on 9 March 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=The+Verge&rft.atitle=This+AI-powered+autocompletion+software+is+Gmail%27s+Smart+Compose+for+coders&rft.date=2019-07-24&rft.aulast=Vincent&rft.aufirst=James&rft_id=https%3A%2F%2Fwww.theverge.com%2F2019%2F7%2F24%2F20708542%2Fcoding-autocompleter-deep-tabnine-ai-deep-learning-smart-compose&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-aid2-30"><span class="mw-cite-backlink"><b><a href="#cite_ref-aid2_30-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFOlson2019" class="citation web cs1">Olson, Mathew (17 December 2019). <a rel="nofollow" class="external text" href="https://www.usgamer.net/articles/ai-dungeon-2-the-text-adventure-where-you-can-do-nearly-anything-is-now-on-mobile">"AI Dungeon 2, the Text Adventure Where You Can do Nearly Anything, Is Now on Mobile"</a>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20200920102933/https://www.usgamer.net/articles/ai-dungeon-2-the-text-adventure-where-you-can-do-nearly-anything-is-now-on-mobile">Archived</a> from the original on 20 September 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=AI+Dungeon+2%2C+the+Text+Adventure+Where+You+Can+do+Nearly+Anything%2C+Is+Now+on+Mobile&rft.date=2019-12-17&rft.aulast=Olson&rft.aufirst=Mathew&rft_id=https%3A%2F%2Fwww.usgamer.net%2Farticles%2Fai-dungeon-2-the-text-adventure-where-you-can-do-nearly-anything-is-now-on-mobile&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-aidungeon-31"><span class="mw-cite-backlink"><b><a href="#cite_ref-aidungeon_31-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFNelius2020" class="citation web cs1">Nelius, Joanna (3 August 2020). <a rel="nofollow" class="external text" href="https://gizmodo.com/this-ai-powered-choose-your-own-adventure-text-game-is-1844593111">"This AI-Powered Choose-Your-Own-Adventure Text Game Is Super Fun and Makes No Sense"</a>. <i>Gizmodo</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210228164901/https://gizmodo.com/this-ai-powered-choose-your-own-adventure-text-game-is-1844593111">Archived</a> from the original on 28 February 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Gizmodo&rft.atitle=This+AI-Powered+Choose-Your-Own-Adventure+Text+Game+Is+Super+Fun+and+Makes+No+Sense&rft.date=2020-08-03&rft.aulast=Nelius&rft.aufirst=Joanna&rft_id=https%3A%2F%2Fgizmodo.com%2Fthis-ai-powered-choose-your-own-adventure-text-game-is-1844593111&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-tclat-32"><span class="mw-cite-backlink"><b><a href="#cite_ref-tclat_32-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHa2021" class="citation web cs1">Ha, Anthony (4 February 2021). <a rel="nofollow" class="external text" href="https://techcrunch.com/2021/02/04/latitude-seed-funding/">"AI Dungeon-maker Latitude raises $3.3M to build games with 'infinite' story possibilities"</a>. TechCrunch. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210221011815/https://techcrunch.com/2021/02/04/latitude-seed-funding/">Archived</a> from the original on 21 February 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=AI+Dungeon-maker+Latitude+raises+%243.3M+to+build+games+with+%27infinite%27+story+possibilities&rft.pub=TechCrunch&rft.date=2021-02-04&rft.aulast=Ha&rft.aufirst=Anthony&rft_id=https%3A%2F%2Ftechcrunch.com%2F2021%2F02%2F04%2Flatitude-seed-funding%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-33"><span class="mw-cite-backlink"><b><a href="#cite_ref-33">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://transformer.huggingface.co/">"Write With Transformer"</a>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20191204060111/https://transformer.huggingface.co/">Archived</a> from the original on December 4, 2019<span class="reference-accessdate">. Retrieved <span class="nowrap">December 4,</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Write+With+Transformer&rft_id=https%3A%2F%2Ftransformer.huggingface.co%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-34"><span class="mw-cite-backlink"><b><a href="#cite_ref-34">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://talktotransformer.com/">"Talk to Transformer"</a>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20191204015009/https://talktotransformer.com/">Archived</a> from the original on December 4, 2019<span class="reference-accessdate">. Retrieved <span class="nowrap">December 4,</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Talk+to+Transformer&rft_id=https%3A%2F%2Ftalktotransformer.com%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-35"><span class="mw-cite-backlink"><b><a href="#cite_ref-35">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://creativeengines.ai/">"CreativeEngines"</a>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20230203201104/https://creativeengines.ai/">Archived</a> from the original on February 3, 2023<span class="reference-accessdate">. Retrieved <span class="nowrap">June 25,</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=CreativeEngines&rft_id=https%3A%2F%2Fcreativeengines.ai%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-teens-36"><span class="mw-cite-backlink"><b><a href="#cite_ref-teens_36-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFOhlheiserHao2021" class="citation web cs1">Ohlheiser, Abby; Hao, Karen (26 February 2021). <a rel="nofollow" class="external text" href="https://www.technologyreview.com/2021/02/26/1020010/trevor-project-ai-suicide-hotline-training/">"An AI is training counselors to deal with teens in crisis"</a>. MIT Technology Review. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210227001731/https://www.technologyreview.com/2021/02/26/1020010/trevor-project-ai-suicide-hotline-training/">Archived</a> from the original on 27 February 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">27 February</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=An+AI+is+training+counselors+to+deal+with+teens+in+crisis&rft.pub=MIT+Technology+Review&rft.date=2021-02-26&rft.aulast=Ohlheiser&rft.aufirst=Abby&rft.au=Hao%2C+Karen&rft_id=https%3A%2F%2Fwww.technologyreview.com%2F2021%2F02%2F26%2F1020010%2Ftrevor-project-ai-suicide-hotline-training%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-37"><span class="mw-cite-backlink"><b><a href="#cite_ref-37">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://openai.com/research/language-models-can-explain-neurons-in-language-models">"Language models can explain neurons in language models"</a>. <i>OpenAI</i><span class="reference-accessdate">. Retrieved <span class="nowrap">13 May</span> 2023</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=OpenAI&rft.atitle=Language+models+can+explain+neurons+in+language+models&rft_id=https%3A%2F%2Fopenai.com%2Fresearch%2Flanguage-models-can-explain-neurons-in-language-models&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-38"><span class="mw-cite-backlink"><b><a href="#cite_ref-38">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFZhuKirosZemelSalakhutdinov2015" class="citation journal cs1">Zhu, Yukun; Kiros, Ryan; Zemel, Rich; Salakhutdinov, Ruslan; Urtasun, Raquel; Torralba, Antonio; Fidler, Sanja (2015). <a rel="nofollow" class="external text" href="https://www.cv-foundation.org/openaccess/content_iccv_2015/html/Zhu_Aligning_Books_and_ICCV_2015_paper.html">"Aligning Books and Movies: Towards Story-Like Visual Explanations by Watching Movies and Reading Books"</a>. <i>International Conference on Computer Vision 2015</i>: <span class="nowrap">19–</span>27. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1506.06724">1506.06724</a></span>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20230205222219/https://www.cv-foundation.org/openaccess/content_iccv_2015/html/Zhu_Aligning_Books_and_ICCV_2015_paper.html">Archived</a> from the original on 2023-02-05<span class="reference-accessdate">. Retrieved <span class="nowrap">2023-02-05</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=International+Conference+on+Computer+Vision+2015&rft.atitle=Aligning+Books+and+Movies%3A+Towards+Story-Like+Visual+Explanations+by+Watching+Movies+and+Reading+Books&rft.pages=%3Cspan+class%3D%22nowrap%22%3E19-%3C%2Fspan%3E27&rft.date=2015&rft_id=info%3Aarxiv%2F1506.06724&rft.aulast=Zhu&rft.aufirst=Yukun&rft.au=Kiros%2C+Ryan&rft.au=Zemel%2C+Rich&rft.au=Salakhutdinov%2C+Ruslan&rft.au=Urtasun%2C+Raquel&rft.au=Torralba%2C+Antonio&rft.au=Fidler%2C+Sanja&rft_id=https%3A%2F%2Fwww.cv-foundation.org%2Fopenaccess%2Fcontent_iccv_2015%2Fhtml%2FZhu_Aligning_Books_and_ICCV_2015_paper.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-gpt3paper-39"><span class="mw-cite-backlink"><b><a href="#cite_ref-gpt3paper_39-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBrownMannRyderSubbiah2020" class="citation arxiv cs1">Brown, Tom B.; Mann, Benjamin; Ryder, Nick; Subbiah, Melanie; Kaplan, Jared; Dhariwal, Prafulla; Neelakantan, Arvind; Shyam, Pranav; Sastry, Girish; Askell, Amanda; Agarwal, Sandhini; Herbert-Voss, Ariel; Krueger, Gretchen; Henighan, Tom; Child, Rewon; Ramesh, Aditya; Ziegler, Daniel M.; Wu, Jeffrey; Winter, Clemens; Hesse, Christopher; Chen, Mark; Sigler, Eric; Litwin, Mateusz; Gray, Scott; Chess, Benjamin; Clark, Jack; Berner, Christopher; McCandlish, Sam; Radford, Alec; Sutskever, Ilya; Amodei, Dario (July 22, 2020). "Language Models are Few-Shot Learners". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2005.14165">2005.14165</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=preprint&rft.jtitle=arXiv&rft.atitle=Language+Models+are+Few-Shot+Learners&rft.date=2020-07-22&rft_id=info%3Aarxiv%2F2005.14165&rft.aulast=Brown&rft.aufirst=Tom+B.&rft.au=Mann%2C+Benjamin&rft.au=Ryder%2C+Nick&rft.au=Subbiah%2C+Melanie&rft.au=Kaplan%2C+Jared&rft.au=Dhariwal%2C+Prafulla&rft.au=Neelakantan%2C+Arvind&rft.au=Shyam%2C+Pranav&rft.au=Sastry%2C+Girish&rft.au=Askell%2C+Amanda&rft.au=Agarwal%2C+Sandhini&rft.au=Herbert-Voss%2C+Ariel&rft.au=Krueger%2C+Gretchen&rft.au=Henighan%2C+Tom&rft.au=Child%2C+Rewon&rft.au=Ramesh%2C+Aditya&rft.au=Ziegler%2C+Daniel+M.&rft.au=Wu%2C+Jeffrey&rft.au=Winter%2C+Clemens&rft.au=Hesse%2C+Christopher&rft.au=Chen%2C+Mark&rft.au=Sigler%2C+Eric&rft.au=Litwin%2C+Mateusz&rft.au=Gray%2C+Scott&rft.au=Chess%2C+Benjamin&rft.au=Clark%2C+Jack&rft.au=Berner%2C+Christopher&rft.au=McCandlish%2C+Sam&rft.au=Radford%2C+Alec&rft.au=Sutskever%2C+Ilya&rft.au=Amodei%2C+Dario&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-Arram_20200709-40"><span class="mw-cite-backlink"><b><a href="#cite_ref-Arram_20200709_40-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFArram2020" class="citation web cs1">Arram (July 9, 2020). <a rel="nofollow" class="external text" href="https://arr.am/2020/07/09/gpt-3-an-ai-thats-eerily-good-at-writing-almost-anything/">"GPT-3: An AI that's eerily good at writing almost anything"</a>. <i>Arram Sabeti</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20200720192137/https://arr.am/2020/07/09/gpt-3-an-ai-thats-eerily-good-at-writing-almost-anything/">Archived</a> from the original on July 20, 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">July 31,</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Arram+Sabeti&rft.atitle=GPT-3%3A+An+AI+that%27s+eerily+good+at+writing+almost+anything&rft.date=2020-07-09&rft.au=Arram&rft_id=https%3A%2F%2Farr.am%2F2020%2F07%2F09%2Fgpt-3-an-ai-thats-eerily-good-at-writing-almost-anything%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> <li id="cite_note-GPT3microsoft-41"><span class="mw-cite-backlink"><b><a href="#cite_ref-GPT3microsoft_41-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHao2020" class="citation magazine cs1">Hao, Karen (September 23, 2020). <a rel="nofollow" class="external text" href="https://www.technologyreview.com/2020/09/23/1008729/openai-is-giving-microsoft-exclusive-access-to-its-gpt-3-language-model/">"OpenAI is giving Microsoft exclusive access to its GPT-3 language model"</a>. <i><a href="/wiki/MIT_Technology_Review" title="MIT Technology Review">MIT Technology Review</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210205121656/https://www.technologyreview.com/2020/09/23/1008729/openai-is-giving-microsoft-exclusive-access-to-its-gpt-3-language-model/">Archived</a> from the original on 2021-02-05<span class="reference-accessdate">. Retrieved <span class="nowrap">2020-09-25</span></span>. <q>The companies say OpenAI will continue to offer its public-facing API, which allows chosen users to send text to GPT-3 or OpenAI's other models and receive its output. Only Microsoft, however, will have access to GPT-3's underlying code, allowing it to embed, repurpose, and modify the model as it pleases.</q></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=MIT+Technology+Review&rft.atitle=OpenAI+is+giving+Microsoft+exclusive+access+to+its+GPT-3+language+model&rft.date=2020-09-23&rft.aulast=Hao&rft.aufirst=Karen&rft_id=https%3A%2F%2Fwww.technologyreview.com%2F2020%2F09%2F23%2F1008729%2Fopenai-is-giving-microsoft-exclusive-access-to-its-gpt-3-language-model%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AGPT-2" class="Z3988"></span></span> </li> </ol></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="OpenAI158" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="3"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239400231" /><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:OpenAI" title="Template:OpenAI"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:OpenAI" title="Template talk:OpenAI"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:OpenAI" title="Special:EditPage/Template:OpenAI"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="OpenAI158" style="font-size:114%;margin:0 4em"><a href="/wiki/OpenAI" title="OpenAI">OpenAI</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">Products</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Chatbot" title="Chatbot">Chatbots</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a> <ul><li><a href="/wiki/ChatGPT_in_education" title="ChatGPT in education">in education</a></li> <li><a href="/wiki/GPT_Store" title="GPT Store">GPT Store</a></li> <li><a href="/wiki/DALL-E" title="DALL-E">DALL-E</a></li> <li><a href="/wiki/SearchGPT" class="mw-redirect" title="SearchGPT">SearchGPT</a></li> <li><a href="/wiki/Sora_(text-to-video_model)" title="Sora (text-to-video model)">Sora</a></li> <li><a href="/wiki/Whisper_(speech_recognition_system)" title="Whisper (speech recognition system)">Whisper</a></li></ul></li> <li><a href="/wiki/GitHub_Copilot" title="GitHub Copilot">GitHub Copilot</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Foundation_model" title="Foundation model">Foundation models</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/OpenAI_Codex" title="OpenAI Codex">OpenAI Codex</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">Generative pre-trained transformer</a> <ul><li><a href="/wiki/GPT-1" title="GPT-1">GPT-1</a></li> <li><a class="mw-selflink selflink">GPT-2</a></li> <li><a href="/wiki/GPT-3" title="GPT-3">GPT-3</a></li> <li><a href="/wiki/GPT-4" title="GPT-4">GPT-4</a></li> <li><a href="/wiki/GPT-4o" title="GPT-4o">GPT-4o</a></li> <li><a href="/wiki/GPT-4.5" title="GPT-4.5">GPT-4.5</a></li> <li><a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a></li> <li><a href="/wiki/OpenAI_o3" title="OpenAI o3">o3</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/AI_agent" class="mw-redirect" title="AI agent">AI agents</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Deep_Research" class="mw-redirect" title="Deep Research">Deep Research</a></li> <li><a href="/wiki/OpenAI_Operator" title="OpenAI Operator">Operator</a></li></ul> </div></td></tr></tbody></table><div></div></td><td class="noviewer navbox-image" rowspan="4" style="width:1px;padding:0 0 0 2px"><div><span typeof="mw:File"><a href="/wiki/File:OpenAI_logo_2025_(wordmark).svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/a/af/OpenAI_logo_2025_%28wordmark%29.svg/150px-OpenAI_logo_2025_%28wordmark%29.svg.png" decoding="async" width="150" height="40" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/a/af/OpenAI_logo_2025_%28wordmark%29.svg/225px-OpenAI_logo_2025_%28wordmark%29.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/a/af/OpenAI_logo_2025_%28wordmark%29.svg/300px-OpenAI_logo_2025_%28wordmark%29.svg.png 2x" data-file-width="512" data-file-height="138" /></a></span></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">People</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Senior_management" title="Senior management">Senior management</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Current</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Sam_Altman" title="Sam Altman">Sam Altman</a> <ul><li><a href="/wiki/Removal_of_Sam_Altman_from_OpenAI" title="Removal of Sam Altman from OpenAI">removal</a></li></ul></li> <li><a href="/wiki/Greg_Brockman" title="Greg Brockman">Greg Brockman</a></li> <li><a href="/wiki/Sarah_Friar" title="Sarah Friar">Sarah Friar</a></li> <li><a href="/wiki/Scott_Schools" title="Scott Schools">Scott Schools</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Former</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Mira_Murati" title="Mira Murati">Mira Murati</a></li> <li><a href="/wiki/Emmett_Shear" title="Emmett Shear">Emmett Shear</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Board_of_directors" title="Board of directors">Board of directors</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Current</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Sam_Altman" title="Sam Altman">Sam Altman</a></li> <li><a href="/wiki/Adam_D%27Angelo" title="Adam D'Angelo">Adam D'Angelo</a></li> <li><a href="/wiki/Sue_Desmond-Hellmann" title="Sue Desmond-Hellmann">Sue Desmond-Hellmann</a></li> <li><a href="/wiki/Paul_Nakasone" title="Paul Nakasone">Paul Nakasone</a></li> <li><a href="/wiki/Adebayo_Ogunlesi" title="Adebayo Ogunlesi">Adebayo Ogunlesi</a></li> <li><a href="/wiki/Nicole_Seligman" title="Nicole Seligman">Nicole Seligman</a></li> <li><a href="/wiki/Fidji_Simo" title="Fidji Simo">Fidji Simo</a></li> <li><a href="/wiki/Lawrence_Summers" title="Lawrence Summers">Lawrence Summers</a></li> <li><a href="/wiki/Bret_Taylor" title="Bret Taylor">Bret Taylor</a> (chair)</li> <li><a href="/wiki/Jakub_Pachocki" title="Jakub Pachocki">Jakub Pachocki</a> (chief scientist)</li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Former</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Greg_Brockman" title="Greg Brockman">Greg Brockman</a> (2017–2023)</li> <li><a href="/wiki/Reid_Hoffman" title="Reid Hoffman">Reid Hoffman</a> (2019–2023)</li> <li><a href="/wiki/Will_Hurd" title="Will Hurd">Will Hurd</a> (2021–2023)</li> <li><a href="/wiki/Holden_Karnofsky" title="Holden Karnofsky">Holden Karnofsky</a> (2017–2021)</li> <li><a href="/wiki/Elon_Musk" title="Elon Musk">Elon Musk</a> (2015–2018)</li> <li><a href="/wiki/Ilya_Sutskever" title="Ilya Sutskever">Ilya Sutskever</a> (2017–2023)</li> <li><a href="/wiki/Helen_Toner" title="Helen Toner">Helen Toner</a> (2021–2023)</li> <li><a href="/wiki/Shivon_Zilis" title="Shivon Zilis">Shivon Zilis</a> (2019–2023)</li></ul> </div></td></tr></tbody></table><div></div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Joint_venture" title="Joint venture">Joint ventures</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Stargate_LLC" title="Stargate LLC">Stargate LLC</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Related</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apple_Intelligence" title="Apple Intelligence">Apple Intelligence</a></li> <li><i><a href="/wiki/AI_Dungeon" title="AI Dungeon">AI Dungeon</a></i></li> <li><a href="/wiki/AutoGPT" title="AutoGPT">AutoGPT</a></li> <li>"<a href="/wiki/Deep_Learning_(South_Park)" title="Deep Learning (South Park)">Deep Learning</a>"</li> <li><a href="/wiki/LangChain" title="LangChain">LangChain</a></li> <li><a href="/wiki/Microsoft_Copilot" title="Microsoft Copilot">Microsoft Copilot</a></li> <li><a href="/wiki/OpenAI_Five" title="OpenAI Five">OpenAI Five</a></li> <li><a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="3"><div> <ul><li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> <a href="/wiki/Category:OpenAI" title="Category:OpenAI">Category</a></li></ul> </div></td></tr></tbody></table></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236075235" /></div><div role="navigation" class="navbox" aria-labelledby="Artificial_intelligence_(AI)752" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239400231" /><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Artificial_intelligence_navbox" title="Template:Artificial intelligence navbox"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Artificial_intelligence_navbox" title="Template talk:Artificial intelligence navbox"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Artificial_intelligence_navbox" title="Special:EditPage/Template:Artificial intelligence navbox"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Artificial_intelligence_(AI)752" style="font-size:114%;margin:0 4em"><a href="/wiki/Artificial_intelligence" title="Artificial intelligence">Artificial intelligence</a> (AI)</div></th></tr><tr><td class="navbox-abovebelow" colspan="2"><div><a href="/wiki/History_of_artificial_intelligence" title="History of artificial intelligence">History</a> (<a href="/wiki/Timeline_of_artificial_intelligence" title="Timeline of artificial intelligence">timeline</a>)</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Concepts</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Parameter" title="Parameter">Parameter</a> <ul><li><a href="/wiki/Hyperparameter_(machine_learning)" title="Hyperparameter (machine learning)">Hyperparameter</a></li></ul></li> <li><a href="/wiki/Loss_functions_for_classification" title="Loss functions for classification">Loss functions</a></li> <li><a href="/wiki/Regression_analysis" title="Regression analysis">Regression</a> <ul><li><a href="/wiki/Bias%E2%80%93variance_tradeoff" title="Bias–variance tradeoff">Bias–variance tradeoff</a></li> <li><a href="/wiki/Double_descent" title="Double descent">Double descent</a></li> <li><a href="/wiki/Overfitting" title="Overfitting">Overfitting</a></li></ul></li> <li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></li> <li><a href="/wiki/Gradient_descent" title="Gradient descent">Gradient descent</a> <ul><li><a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">SGD</a></li> <li><a href="/wiki/Quasi-Newton_method" title="Quasi-Newton method">Quasi-Newton method</a></li> <li><a href="/wiki/Conjugate_gradient_method" title="Conjugate gradient method">Conjugate gradient method</a></li></ul></li> <li><a href="/wiki/Backpropagation" title="Backpropagation">Backpropagation</a></li> <li><a href="/wiki/Attention_(machine_learning)" title="Attention (machine learning)">Attention</a></li> <li><a href="/wiki/Convolution" title="Convolution">Convolution</a></li> <li><a href="/wiki/Normalization_(machine_learning)" title="Normalization (machine learning)">Normalization</a> <ul><li><a href="/wiki/Batch_normalization" title="Batch normalization">Batchnorm</a></li></ul></li> <li><a href="/wiki/Activation_function" title="Activation function">Activation</a> <ul><li><a href="/wiki/Softmax_function" title="Softmax function">Softmax</a></li> <li><a href="/wiki/Sigmoid_function" title="Sigmoid function">Sigmoid</a></li> <li><a href="/wiki/Rectifier_(neural_networks)" title="Rectifier (neural networks)">Rectifier</a></li></ul></li> <li><a href="/wiki/Gating_mechanism" title="Gating mechanism">Gating</a></li> <li><a href="/wiki/Weight_initialization" title="Weight initialization">Weight initialization</a></li> <li><a href="/wiki/Regularization_(mathematics)" title="Regularization (mathematics)">Regularization</a></li> <li><a href="/wiki/Training,_validation,_and_test_data_sets" title="Training, validation, and test data sets">Datasets</a> <ul><li><a href="/wiki/Data_augmentation" title="Data augmentation">Augmentation</a></li></ul></li> <li><a href="/wiki/Prompt_engineering" title="Prompt engineering">Prompt engineering</a></li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a> <ul><li><a href="/wiki/Q-learning" title="Q-learning">Q-learning</a></li> <li><a href="/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action" title="State–action–reward–state–action">SARSA</a></li> <li><a href="/wiki/Imitation_learning" title="Imitation learning">Imitation</a></li> <li><a href="/wiki/Policy_gradient_method" title="Policy gradient method">Policy gradient</a></li></ul></li> <li><a href="/wiki/Diffusion_process" title="Diffusion process">Diffusion</a></li> <li><a href="/wiki/Latent_diffusion_model" title="Latent diffusion model">Latent diffusion model</a></li> <li><a href="/wiki/Autoregressive_model" title="Autoregressive model">Autoregression</a></li> <li><a href="/wiki/Adversarial_machine_learning" title="Adversarial machine learning">Adversary</a></li> <li><a href="/wiki/Retrieval-augmented_generation" title="Retrieval-augmented generation">RAG</a></li> <li><a href="/wiki/Uncanny_valley" title="Uncanny valley">Uncanny valley</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">RLHF</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Recursive_self-improvement" title="Recursive self-improvement">Recursive self-improvement</a></li> <li><a href="/wiki/Word_embedding" title="Word embedding">Word embedding</a></li> <li><a href="/wiki/Hallucination_(artificial_intelligence)" title="Hallucination (artificial intelligence)">Hallucination</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Applications</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a> <ul><li><a href="/wiki/Prompt_engineering#In-context_learning" title="Prompt engineering">In-context learning</a></li></ul></li> <li><a href="/wiki/Neural_network_(machine_learning)" title="Neural network (machine learning)">Artificial neural network</a> <ul><li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li></ul></li> <li><a href="/wiki/Language_model" title="Language model">Language model</a> <ul><li><a href="/wiki/Large_language_model" title="Large language model">Large language model</a></li> <li><a href="/wiki/Neural_machine_translation" title="Neural machine translation">NMT</a></li></ul></li> <li><a href="/wiki/Artificial_general_intelligence" title="Artificial general intelligence">Artificial general intelligence</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Implementations</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Audio–visual</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlexNet" title="AlexNet">AlexNet</a></li> <li><a href="/wiki/WaveNet" title="WaveNet">WaveNet</a></li> <li><a href="/wiki/Human_image_synthesis" title="Human image synthesis">Human image synthesis</a></li> <li><a href="/wiki/Handwriting_recognition" title="Handwriting recognition">HWR</a></li> <li><a href="/wiki/Optical_character_recognition" title="Optical character recognition">OCR</a></li> <li><a href="/wiki/Deep_learning_speech_synthesis" title="Deep learning speech synthesis">Speech synthesis</a> <ul><li><a href="/wiki/15.ai" title="15.ai">15.ai</a></li> <li><a href="/wiki/ElevenLabs" title="ElevenLabs">ElevenLabs</a></li></ul></li> <li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a> <ul><li><a href="/wiki/Whisper_(speech_recognition_system)" title="Whisper (speech recognition system)">Whisper</a></li></ul></li> <li><a href="/wiki/Facial_recognition_system" title="Facial recognition system">Facial recognition</a></li> <li><a href="/wiki/AlphaFold" title="AlphaFold">AlphaFold</a></li> <li><a href="/wiki/Text-to-image_model" title="Text-to-image model">Text-to-image models</a> <ul><li><a href="/wiki/Aurora_(text-to-image_model)" class="mw-redirect" title="Aurora (text-to-image model)">Aurora</a></li> <li><a href="/wiki/DALL-E" title="DALL-E">DALL-E</a></li> <li><a href="/wiki/Adobe_Firefly" title="Adobe Firefly">Firefly</a></li> <li><a href="/wiki/Flux_(text-to-image_model)" title="Flux (text-to-image model)">Flux</a></li> <li><a href="/wiki/Ideogram_(text-to-image_model)" title="Ideogram (text-to-image model)">Ideogram</a></li> <li><a href="/wiki/Imagen_(text-to-image_model)" title="Imagen (text-to-image model)">Imagen</a></li> <li><a href="/wiki/Midjourney" title="Midjourney">Midjourney</a></li> <li><a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a></li></ul></li> <li><a href="/wiki/Text-to-video_model" title="Text-to-video model">Text-to-video models</a> <ul><li><a href="/wiki/Dream_Machine_(text-to-video_model)" title="Dream Machine (text-to-video model)">Dream Machine</a></li> <li><a href="/wiki/Runway_(company)#Gen-3_Alpha" title="Runway (company)">Gen-3 Alpha</a></li> <li><a href="/wiki/MiniMax_(company)#Hailuo_AI" title="MiniMax (company)">Hailuo AI</a></li> <li><a href="/wiki/Kling_(text-to-video_model)" class="mw-redirect" title="Kling (text-to-video model)">Kling</a></li> <li><a href="/wiki/Sora_(text-to-video_model)" title="Sora (text-to-video model)">Sora</a></li> <li><a href="/wiki/Google_DeepMind#Video_model" title="Google DeepMind">Veo</a></li></ul></li> <li><a href="/wiki/Music_and_artificial_intelligence" title="Music and artificial intelligence">Music generation</a> <ul><li><a href="/wiki/Suno_AI" title="Suno AI">Suno AI</a></li> <li><a href="/wiki/Udio" title="Udio">Udio</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Text</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Word2vec" title="Word2vec">Word2vec</a></li> <li><a href="/wiki/Seq2seq" title="Seq2seq">Seq2seq</a></li> <li><a href="/wiki/GloVe" title="GloVe">GloVe</a></li> <li><a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a></li> <li><a href="/wiki/T5_(language_model)" title="T5 (language model)">T5</a></li> <li><a href="/wiki/Llama_(language_model)" title="Llama (language model)">Llama</a></li> <li><a href="/wiki/Chinchilla_(language_model)" title="Chinchilla (language model)">Chinchilla AI</a></li> <li><a href="/wiki/PaLM" title="PaLM">PaLM</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPT</a> <ul><li><a href="/wiki/GPT-1" title="GPT-1">1</a></li> <li><a class="mw-selflink selflink">2</a></li> <li><a href="/wiki/GPT-3" title="GPT-3">3</a></li> <li><a href="/wiki/GPT-J" title="GPT-J">J</a></li> <li><a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a></li> <li><a href="/wiki/GPT-4" title="GPT-4">4</a></li> <li><a href="/wiki/GPT-4o" title="GPT-4o">4o</a></li> <li><a href="/wiki/GPT-4.5" title="GPT-4.5">4.5</a></li> <li><a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a></li> <li><a href="/wiki/OpenAI_o3" title="OpenAI o3">o3</a></li></ul></li> <li><a href="/wiki/Claude_(language_model)" title="Claude (language model)">Claude</a></li> <li><a href="/wiki/Gemini_(language_model)" title="Gemini (language model)">Gemini</a> <ul><li><a href="/wiki/Gemini_(chatbot)" title="Gemini (chatbot)">chatbot</a></li></ul></li> <li><a href="/wiki/Grok_(chatbot)" title="Grok (chatbot)">Grok</a></li> <li><a href="/wiki/LaMDA" title="LaMDA">LaMDA</a></li> <li><a href="/wiki/BLOOM_(language_model)" title="BLOOM (language model)">BLOOM</a></li> <li><a href="/wiki/Project_Debater" title="Project Debater">Project Debater</a></li> <li><a href="/wiki/IBM_Watson" title="IBM Watson">IBM Watson</a></li> <li><a href="/wiki/IBM_Watsonx" title="IBM Watsonx">IBM Watsonx</a></li> <li><a href="/wiki/IBM_Granite" title="IBM Granite">Granite</a></li> <li><a href="/wiki/Huawei_PanGu" title="Huawei PanGu">PanGu-Σ</a></li> <li><a href="/wiki/DeepSeek_(chatbot)" title="DeepSeek (chatbot)">DeepSeek</a></li> <li><a href="/wiki/Qwen" title="Qwen">Qwen</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Decisional</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlphaGo" title="AlphaGo">AlphaGo</a></li> <li><a href="/wiki/AlphaZero" title="AlphaZero">AlphaZero</a></li> <li><a href="/wiki/OpenAI_Five" title="OpenAI Five">OpenAI Five</a></li> <li><a href="/wiki/Self-driving_car" title="Self-driving car">Self-driving car</a></li> <li><a href="/wiki/MuZero" title="MuZero">MuZero</a></li> <li><a href="/wiki/Action_selection" title="Action selection">Action selection</a> <ul><li><a href="/wiki/AutoGPT" title="AutoGPT">AutoGPT</a></li></ul></li> <li><a href="/wiki/Robot_control" title="Robot control">Robot control</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">People</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Alan_Turing" title="Alan Turing">Alan Turing</a></li> <li><a href="/wiki/Warren_Sturgis_McCulloch" title="Warren Sturgis McCulloch">Warren Sturgis McCulloch</a></li> <li><a href="/wiki/Walter_Pitts" title="Walter Pitts">Walter Pitts</a></li> <li><a href="/wiki/John_von_Neumann" title="John von Neumann">John von Neumann</a></li> <li><a href="/wiki/Claude_Shannon" title="Claude Shannon">Claude Shannon</a></li> <li><a href="/wiki/Marvin_Minsky" title="Marvin Minsky">Marvin Minsky</a></li> <li><a href="/wiki/John_McCarthy_(computer_scientist)" title="John McCarthy (computer scientist)">John McCarthy</a></li> <li><a href="/wiki/Nathaniel_Rochester_(computer_scientist)" title="Nathaniel Rochester (computer scientist)">Nathaniel Rochester</a></li> <li><a href="/wiki/Allen_Newell" title="Allen Newell">Allen Newell</a></li> <li><a href="/wiki/Cliff_Shaw" title="Cliff Shaw">Cliff Shaw</a></li> <li><a href="/wiki/Herbert_A._Simon" title="Herbert A. Simon">Herbert A. Simon</a></li> <li><a href="/wiki/Oliver_Selfridge" title="Oliver Selfridge">Oliver Selfridge</a></li> <li><a href="/wiki/Frank_Rosenblatt" title="Frank Rosenblatt">Frank Rosenblatt</a></li> <li><a href="/wiki/Bernard_Widrow" title="Bernard Widrow">Bernard Widrow</a></li> <li><a href="/wiki/Joseph_Weizenbaum" title="Joseph Weizenbaum">Joseph Weizenbaum</a></li> <li><a href="/wiki/Seymour_Papert" title="Seymour Papert">Seymour Papert</a></li> <li><a href="/wiki/Seppo_Linnainmaa" title="Seppo Linnainmaa">Seppo Linnainmaa</a></li> <li><a href="/wiki/Paul_Werbos" title="Paul Werbos">Paul Werbos</a></li> <li><a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a></li> <li><a href="/wiki/Yann_LeCun" title="Yann LeCun">Yann LeCun</a></li> <li><a href="/wiki/Geoffrey_Hinton" title="Geoffrey Hinton">Geoffrey Hinton</a></li> <li><a href="/wiki/John_Hopfield" title="John Hopfield">John Hopfield</a></li> <li><a href="/wiki/Yoshua_Bengio" title="Yoshua Bengio">Yoshua Bengio</a></li> <li><a href="/wiki/Lotfi_A._Zadeh" title="Lotfi A. Zadeh">Lotfi A. Zadeh</a></li> <li><a href="/wiki/Stephen_Grossberg" title="Stephen Grossberg">Stephen Grossberg</a></li> <li><a href="/wiki/Alex_Graves_(computer_scientist)" title="Alex Graves (computer scientist)">Alex Graves</a></li> <li><a href="/wiki/Andrew_Ng" title="Andrew Ng">Andrew Ng</a></li> <li><a href="/wiki/Fei-Fei_Li" title="Fei-Fei Li">Fei-Fei Li</a></li> <li><a href="/wiki/Alex_Krizhevsky" title="Alex Krizhevsky">Alex Krizhevsky</a></li> <li><a href="/wiki/Ilya_Sutskever" title="Ilya Sutskever">Ilya Sutskever</a></li> <li><a href="/wiki/Demis_Hassabis" title="Demis Hassabis">Demis Hassabis</a></li> <li><a href="/wiki/David_Silver_(computer_scientist)" title="David Silver (computer scientist)">David Silver</a></li> <li><a href="/wiki/Ian_Goodfellow" title="Ian Goodfellow">Ian Goodfellow</a></li> <li><a href="/wiki/Andrej_Karpathy" title="Andrej Karpathy">Andrej Karpathy</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Architectures</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Neural_Turing_machine" title="Neural Turing machine">Neural Turing machine</a></li> <li><a href="/wiki/Differentiable_neural_computer" title="Differentiable neural computer">Differentiable neural computer</a></li> <li><a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a> <ul><li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision transformer (ViT)</a></li></ul></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network (RNN)</a></li> <li><a href="/wiki/Long_short-term_memory" title="Long short-term memory">Long short-term memory (LSTM)</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">Gated recurrent unit (GRU)</a></li> <li><a href="/wiki/Echo_state_network" title="Echo state network">Echo state network</a></li> <li><a href="/wiki/Multilayer_perceptron" title="Multilayer perceptron">Multilayer perceptron (MLP)</a></li> <li><a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">Convolutional neural network (CNN)</a></li> <li><a href="/wiki/Residual_neural_network" title="Residual neural network">Residual neural network (RNN)</a></li> <li><a href="/wiki/Highway_network" title="Highway network">Highway network</a></li> <li><a href="/wiki/Mamba_(deep_learning_architecture)" title="Mamba (deep learning architecture)">Mamba</a></li> <li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Variational_autoencoder" title="Variational autoencoder">Variational autoencoder (VAE)</a></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">Generative adversarial network (GAN)</a></li> <li><a href="/wiki/Graph_neural_network" title="Graph neural network">Graph neural network (GNN)</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><a href="/wiki/File:Symbol_portal_class.svg" class="mw-file-description" title="Portal"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/16px-Symbol_portal_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/23px-Symbol_portal_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/31px-Symbol_portal_class.svg.png 2x" data-file-width="180" data-file-height="185" /></a></span> Portals <ul><li><a href="/wiki/Portal:Technology" title="Portal:Technology">Technology</a></li></ul></li> <li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> <a href="/wiki/Category:Artificial_intelligence" title="Category:Artificial intelligence">Category</a> <ul><li><a href="/wiki/Category:Artificial_neural_networks" title="Category:Artificial neural networks">Artificial neural networks</a></li> <li><a href="/wiki/Category:Machine_learning" title="Category:Machine learning">Machine learning</a></li></ul></li> <li><span class="noviewer" typeof="mw:File"><span title="List-Class article"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/d/db/Symbol_list_class.svg/16px-Symbol_list_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/d/db/Symbol_list_class.svg/23px-Symbol_list_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/d/db/Symbol_list_class.svg/31px-Symbol_list_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> List <ul><li><a href="/wiki/List_of_artificial_intelligence_companies" title="List of artificial intelligence companies">Companies</a></li> <li><a href="/wiki/List_of_artificial_intelligence_projects" title="List of artificial intelligence projects">Projects</a></li></ul></li></ul> </div></td></tr></tbody></table></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236075235" /></div><div role="navigation" class="navbox" aria-labelledby="Generative_AI409" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239400231" /><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Generative_AI" title="Template:Generative AI"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Generative_AI" title="Template talk:Generative AI"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Generative_AI" title="Special:EditPage/Template:Generative AI"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Generative_AI409" style="font-size:114%;margin:0 4em"><a href="/wiki/Generative_artificial_intelligence" title="Generative artificial intelligence">Generative AI</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">Concepts</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">Generative adversarial network</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">Generative pre-trained transformer</a></li> <li><a href="/wiki/Large_language_model" title="Large language model">Large language model</a></li> <li><a href="/wiki/Neural_network_(machine_learning)" title="Neural network (machine learning)">Neural network</a></li> <li><a href="/wiki/Prompt_engineering" title="Prompt engineering">Prompt engineering</a></li> <li><a href="/wiki/Retrieval-augmented_generation" title="Retrieval-augmented generation">Retrieval-augmented generation</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">Reinforcement learning from human feedback</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a></li> <li><a href="/wiki/Variational_autoencoder" title="Variational autoencoder">Variational autoencoder</a></li> <li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision transformer</a></li> <li><a href="/wiki/Word_embedding" title="Word embedding">Word embedding</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Models</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Text</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Claude_(language_model)" title="Claude (language model)">Claude</a></li> <li><a href="/wiki/DBRX" title="DBRX">DBRX</a></li> <li><a href="/wiki/DeepSeek_(chatbot)" title="DeepSeek (chatbot)">DeepSeek</a></li> <li><a href="/wiki/Ernie_Bot" title="Ernie Bot">ERNIE</a></li> <li><a href="/wiki/Gemini_(chatbot)" title="Gemini (chatbot)">Gemini</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPT</a> <ul><li><a href="/wiki/GPT-1" title="GPT-1">1</a></li> <li><a class="mw-selflink selflink">2</a></li> <li><a href="/wiki/GPT-3" title="GPT-3">3</a></li> <li><a href="/wiki/GPT-J" title="GPT-J">J</a></li> <li><a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a></li> <li><a href="/wiki/GPT-4" title="GPT-4">4</a></li> <li><a href="/wiki/GPT-4o" title="GPT-4o">4o</a></li> <li><a href="/wiki/GPT-4.5" title="GPT-4.5">4.5</a></li> <li><a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a></li> <li><a href="/wiki/OpenAI_o3" title="OpenAI o3">o3</a></li></ul></li> <li><a href="/wiki/Grok_(chatbot)" title="Grok (chatbot)">Grok</a></li> <li><a href="/wiki/IBM_Granite" title="IBM Granite">Granite</a></li> <li><a href="/wiki/Llama_(language_model)" title="Llama (language model)">Llama</a></li> <li><a href="/wiki/Mistral_AI#Mistral_Large" title="Mistral AI">Mistral Large</a></li> <li><a href="/wiki/Huawei_PanGu" title="Huawei PanGu">PanGu-Σ</a></li> <li><a href="/wiki/Qwen" title="Qwen">Qwen</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text-to-image_model" title="Text-to-image model">Image</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Aurora_(text-to-image_model)" class="mw-redirect" title="Aurora (text-to-image model)">Aurora</a></li> <li><a href="/wiki/DALL-E" title="DALL-E">DALL-E</a></li> <li><a href="/wiki/Adobe_Firefly" title="Adobe Firefly">Firefly</a></li> <li><a href="/wiki/Flux_(text-to-image_model)" title="Flux (text-to-image model)">Flux</a></li> <li><a href="/wiki/Ideogram_(text-to-image_model)" title="Ideogram (text-to-image model)">Ideogram</a></li> <li><a href="/wiki/Imagen_(text-to-image_model)" title="Imagen (text-to-image model)">Imagen</a></li> <li><a href="/wiki/Midjourney" title="Midjourney">Midjourney</a></li> <li><a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Speech</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/15.ai" title="15.ai">15.ai</a></li> <li><a href="/wiki/WaveNet" title="WaveNet">WaveNet</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text-to-video_model" title="Text-to-video model">Video</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Dream_Machine_(text-to-video_model)" title="Dream Machine (text-to-video model)">Dream Machine</a></li> <li><a href="/wiki/Runway_(company)#Gen-3_Alpha" title="Runway (company)">Gen-3 Alpha</a></li> <li><a href="/wiki/MiniMax_(company)#Hailuo_AI" title="MiniMax (company)">Hailuo AI</a></li> <li><a href="/wiki/Kling_(text-to-video_model)" class="mw-redirect" title="Kling (text-to-video model)">Kling</a></li> <li><a href="/wiki/Sora_(text-to-video_model)" title="Sora (text-to-video model)">Sora</a></li> <li><a href="/wiki/Google_DeepMind#Video_model" title="Google DeepMind">Veo</a></li> <li><a href="/wiki/VideoPoet" title="VideoPoet">VideoPoet</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Music</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Udio" title="Udio">Udio</a></li> <li><a href="/wiki/Suno_AI" title="Suno AI">Suno AI</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/List_of_artificial_intelligence_companies" title="List of artificial intelligence companies">Companies</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/01.AI" title="01.AI">01.AI</a></li> <li><a href="/wiki/Alibaba_Group" title="Alibaba Group">Alibaba</a></li> <li><a href="/wiki/Anthropic" title="Anthropic">Anthropic</a></li> <li><a href="/wiki/Baichuan" title="Baichuan">Baichuan</a></li> <li><a href="/wiki/Baidu" title="Baidu">Baidu</a></li> <li><a href="/wiki/DeepSeek" title="DeepSeek">DeepSeek</a></li> <li><a href="/wiki/ElevenLabs" title="ElevenLabs">ElevenLabs</a></li> <li><a href="/wiki/Google_DeepMind" title="Google DeepMind">Google DeepMind</a></li> <li><a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></li> <li><a href="/wiki/Kuaishou" title="Kuaishou">Kuaishou</a></li> <li><a href="/wiki/Meta_AI" title="Meta AI">Meta AI</a></li> <li><a href="/wiki/MiniMax_(company)" title="MiniMax (company)">MiniMax</a></li> <li><a href="/wiki/Mistral_AI" title="Mistral AI">Mistral AI</a></li> <li><a href="/wiki/Moonshot_AI" title="Moonshot AI">Moonshot AI</a></li> <li><a href="/wiki/OpenAI" title="OpenAI">OpenAI</a></li> <li><a href="/wiki/Runway_(company)" title="Runway (company)">Runway</a></li> <li><a href="/wiki/Stability_AI" title="Stability AI">Stability AI</a></li> <li><a href="/wiki/Synthesia_(company)" title="Synthesia (company)">Synthesia</a></li> <li><a href="/wiki/XAI_(company)" title="XAI (company)">xAI</a></li> <li><a href="/wiki/Zhipu_AI" title="Zhipu AI">Zhipu AI</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> <b><a href="/wiki/Category:Generative_artificial_intelligence" title="Category:Generative artificial intelligence">Category</a></b></li> <li><span class="noviewer" typeof="mw:File"><span title="Commons page"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/20px-Commons-logo.svg.png" decoding="async" width="12" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/40px-Commons-logo.svg.png 2x" data-file-width="1024" data-file-height="1376" /></span></span> <b><a href="https://commons.wikimedia.org/wiki/Category:Generative_artificial_intelligence" class="extiw" title="commons:Category:Generative artificial intelligence">Commons</a></b></li></ul> </div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐api‐ext.eqiad.main‐df448499c‐xb7wd Cached time: 20250318164343 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 1.177 seconds Real time usage: 1.461 seconds Preprocessor visited node count: 4251/1000000 Post‐expand include size: 212983/2097152 bytes Template argument size: 4863/2097152 bytes Highest expansion depth: 13/100 Expensive parser function count: 5/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 194251/5000000 bytes Lua time usage: 0.708/10.000 seconds Lua memory usage: 7086171/52428800 bytes Number of Wikibase entities loaded: 1/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 1163.630 1 -total 26.42% 307.374 29 Template:Cite_web 14.63% 170.274 1 Template:Machine_learning 14.03% 163.221 1 Template:Sidebar_with_collapsible_lists 10.97% 127.678 9 Template:Navbox 10.62% 123.595 1 Template:Infobox_software 10.59% 123.220 1 Template:Short_description 10.00% 116.415 1 Template:Infobox 7.56% 87.980 1 Template:OpenAI_navbox 7.22% 84.029 2 Template:Pagetype --> <!-- Saved in parser cache with key enwiki:pcache:66045029:|#|:idhash:canonical and timestamp 20250318164343 and revision id 1278972269. Rendering was triggered because: unknown --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?useformat=desktop&type=1x1&usesul3=0" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=GPT-2&oldid=1278972269">https://en.wikipedia.org/w/index.php?title=GPT-2&oldid=1278972269</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Large_language_models" title="Category:Large language models">Large language models</a></li><li><a href="/wiki/Category:Generative_pre-trained_transformers" title="Category:Generative pre-trained transformers">Generative pre-trained transformers</a></li><li><a href="/wiki/Category:Software_using_the_MIT_license" title="Category:Software using the MIT license">Software using the MIT license</a></li><li><a href="/wiki/Category:OpenAI" title="Category:OpenAI">OpenAI</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:CS1_maint:_numeric_names:_authors_list" title="Category:CS1 maint: numeric names: authors list">CS1 maint: numeric names: authors list</a></li><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_is_different_from_Wikidata" title="Category:Short description is different from Wikidata">Short description is different from Wikidata</a></li><li><a href="/wiki/Category:All_articles_with_unsourced_statements" title="Category:All articles with unsourced statements">All articles with unsourced statements</a></li><li><a href="/wiki/Category:Articles_with_unsourced_statements_from_June_2024" title="Category:Articles with unsourced statements from June 2024">Articles with unsourced statements from June 2024</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 5 March 2025, at 19:22<span class="anonymous-show"> (UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=GPT-2&mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><picture><source media="(min-width: 500px)" srcset="/static/images/footer/wikimedia-button.svg" width="84" height="29"><img src="/static/images/footer/wikimedia.svg" width="25" height="25" alt="Wikimedia Foundation" lang="en" loading="lazy"></picture></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><picture><source media="(min-width: 500px)" srcset="/w/resources/assets/poweredby_mediawiki.svg" width="88" height="31"><img src="/w/resources/assets/mediawiki_compact.svg" alt="Powered by MediaWiki" lang="en" width="25" height="25" loading="lazy"></picture></a></li> </ul> </footer> </div> </div> </div> <div class="vector-header-container vector-sticky-header-container"> <div id="vector-sticky-header" class="vector-sticky-header"> <div class="vector-sticky-header-start"> <div class="vector-sticky-header-icon-start vector-button-flush-left vector-button-flush-right" aria-hidden="true"> <button class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-sticky-header-search-toggle" tabindex="-1" data-event-name="ui.vector-sticky-search-form.icon"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </button> </div> <div role="search" class="vector-search-box-vue vector-search-box-show-thumbnail vector-search-box"> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail"> <form action="/w/index.php" id="vector-sticky-search-form" class="cdx-search-input cdx-search-input--has-end-button"> <div class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia"> <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <div class="vector-sticky-header-context-bar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-sticky-header-toc" class="vector-dropdown mw-portlet mw-portlet-sticky-header-toc vector-sticky-header-toc vector-button-flush-left" > <input type="checkbox" id="vector-sticky-header-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-sticky-header-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-sticky-header-toc-label" for="vector-sticky-header-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-sticky-header-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div class="vector-sticky-header-context-bar-primary" aria-hidden="true" ><span class="mw-page-title-main">GPT-2</span></div> </div> </div> <div class="vector-sticky-header-end" aria-hidden="true"> <div class="vector-sticky-header-icons"> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-talk-sticky-header" tabindex="-1" data-event-name="talk-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbles mw-ui-icon-wikimedia-speechBubbles"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-subject-sticky-header" tabindex="-1" data-event-name="subject-sticky-header"><span class="vector-icon mw-ui-icon-article mw-ui-icon-wikimedia-article"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-history-sticky-header" tabindex="-1" data-event-name="history-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-history mw-ui-icon-wikimedia-wikimedia-history"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only mw-watchlink" id="ca-watchstar-sticky-header" tabindex="-1" data-event-name="watch-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-star mw-ui-icon-wikimedia-wikimedia-star"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-edit-sticky-header" tabindex="-1" data-event-name="wikitext-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-wikiText mw-ui-icon-wikimedia-wikimedia-wikiText"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-ve-edit-sticky-header" tabindex="-1" data-event-name="ve-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-edit mw-ui-icon-wikimedia-wikimedia-edit"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-viewsource-sticky-header" tabindex="-1" data-event-name="ve-edit-protected-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-editLock mw-ui-icon-wikimedia-wikimedia-editLock"></span> <span></span> </a> </div> <div class="vector-sticky-header-buttons"> <button class="cdx-button cdx-button--weight-quiet mw-interlanguage-selector" id="p-lang-btn-sticky-header" tabindex="-1" data-event-name="ui.dropdown-p-lang-btn-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-language mw-ui-icon-wikimedia-wikimedia-language"></span> <span>13 languages</span> </button> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive" id="ca-addsection-sticky-header" tabindex="-1" data-event-name="addsection-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbleAdd-progressive mw-ui-icon-wikimedia-speechBubbleAdd-progressive"></span> <span>Add topic</span> </a> </div> <div class="vector-sticky-header-icon-end"> <div class="vector-user-links"> </div> </div> </div> </div> </div> <div class="mw-portlet mw-portlet-dock-bottom emptyPortlet" id="p-dock-bottom"> <ul> </ul> </div> <script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.eqiad.main-78bdfcd464-pk2w7","wgBackendResponseTime":212,"wgPageParseReport":{"limitreport":{"cputime":"1.177","walltime":"1.461","ppvisitednodes":{"value":4251,"limit":1000000},"postexpandincludesize":{"value":212983,"limit":2097152},"templateargumentsize":{"value":4863,"limit":2097152},"expansiondepth":{"value":13,"limit":100},"expensivefunctioncount":{"value":5,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":194251,"limit":5000000},"entityaccesscount":{"value":1,"limit":400},"timingprofile":["100.00% 1163.630 1 -total"," 26.42% 307.374 29 Template:Cite_web"," 14.63% 170.274 1 Template:Machine_learning"," 14.03% 163.221 1 Template:Sidebar_with_collapsible_lists"," 10.97% 127.678 9 Template:Navbox"," 10.62% 123.595 1 Template:Infobox_software"," 10.59% 123.220 1 Template:Short_description"," 10.00% 116.415 1 Template:Infobox"," 7.56% 87.980 1 Template:OpenAI_navbox"," 7.22% 84.029 2 Template:Pagetype"]},"scribunto":{"limitreport-timeusage":{"value":"0.708","limit":"10.000"},"limitreport-memusage":{"value":7086171,"limit":52428800}},"cachereport":{"origin":"mw-api-ext.eqiad.main-df448499c-xb7wd","timestamp":"20250318164343","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"GPT-2","url":"https:\/\/en.wikipedia.org\/wiki\/GPT-2","sameAs":"http:\/\/www.wikidata.org\/entity\/Q95726727","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q95726727","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2020-12-08T01:07:46Z","dateModified":"2025-03-05T19:22:47Z","image":"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/a\/ad\/GPT2-talks-about-GPT2.png","headline":"2019 text-generating large language model"}</script> </body> </html>