CINXE.COM
DeepSeek - Wikipedia
<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>DeepSeek - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"837b8ef2-970f-409f-9fc2-5c96f29104fd","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"DeepSeek","wgTitle":"DeepSeek","wgCurRevisionId":1281555935,"wgRevisionId":1281555935,"wgArticleId":78452842,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles containing Chinese-language text","Articles containing simplified Chinese-language text","CS1 Chinese (China)-language sources (zh-cn)","Articles with short description","Short description matches Wikidata","Use dmy dates from February 2025","Use American English from February 2025","All Wikipedia articles written in American English","All articles with unsourced statements","Articles with unsourced statements from February 2025","All pages needing factual verification","Wikipedia articles needing factual verification from March 2025","Wikipedia articles that are too technical from January 2025","All articles that are too technical","Articles lacking reliable references from February 2025","All articles lacking reliable references","Commons category link from Wikidata","2023 establishments in China","Artificial intelligence companies","Artificial intelligence laboratories","Companies based in Hangzhou","Technology companies established in 2023","Chinese brands"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"DeepSeek","wgRelevantArticleId":78452842,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":60000,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q131577453","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGELevelingUpEnabledForUser":false}; RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","ext.math.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.tablesorter.styles":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","mediawiki.page.media","site","mediawiki.page.ready","jquery.tablesorter","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.cite.styles%7Cext.math.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cjquery.tablesorter.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&only=styles&skin=vector-2022"> <script async="" src="/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&modules=site.styles&only=styles&skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.21"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="DeepSeek - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/DeepSeek"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=DeepSeek&action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/DeepSeek"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-DeepSeek rootpage-DeepSeek skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" title="Main menu" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li><li id="n-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages"><span>Special pages</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page's font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/?wmf_source=donate&wmf_medium=sidebar&wmf_campaign=en.wikipedia.org&uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&returnto=DeepSeek" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&returnto=DeepSeek" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/?wmf_source=donate&wmf_medium=sidebar&wmf_campaign=en.wikipedia.org&uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&returnto=DeepSeek" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&returnto=DeepSeek" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-History" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#History"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>History</span> </div> </a> <button aria-controls="toc-History-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle History subsection</span> </button> <ul id="toc-History-sublist" class="vector-toc-list"> <li id="toc-Founding_and_early_years_(2016–2023)" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Founding_and_early_years_(2016–2023)"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.1</span> <span>Founding and early years (2016–2023)</span> </div> </a> <ul id="toc-Founding_and_early_years_(2016–2023)-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Model_releases_(2023–present)" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Model_releases_(2023–present)"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.2</span> <span>Model releases (2023–present)</span> </div> </a> <ul id="toc-Model_releases_(2023–present)-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Company_operation" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Company_operation"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Company operation</span> </div> </a> <button aria-controls="toc-Company_operation-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Company operation subsection</span> </button> <ul id="toc-Company_operation-sublist" class="vector-toc-list"> <li id="toc-Strategy" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Strategy"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1</span> <span>Strategy</span> </div> </a> <ul id="toc-Strategy-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Training_framework" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Training_framework"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Training framework</span> </div> </a> <ul id="toc-Training_framework-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Development_and_release_history" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Development_and_release_history"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Development and release history</span> </div> </a> <ul id="toc-Development_and_release_history-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Overview_of_models" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Overview_of_models"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>Overview of models</span> </div> </a> <button aria-controls="toc-Overview_of_models-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Overview of models subsection</span> </button> <ul id="toc-Overview_of_models-sublist" class="vector-toc-list"> <li id="toc-DeepSeek_Coder" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#DeepSeek_Coder"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.1</span> <span>DeepSeek Coder</span> </div> </a> <ul id="toc-DeepSeek_Coder-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-DeepSeek-LLM" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#DeepSeek-LLM"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.2</span> <span>DeepSeek-LLM</span> </div> </a> <ul id="toc-DeepSeek-LLM-sublist" class="vector-toc-list"> <li id="toc-MoE" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#MoE"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.2.1</span> <span>MoE</span> </div> </a> <ul id="toc-MoE-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Math" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Math"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.2.2</span> <span>Math</span> </div> </a> <ul id="toc-Math-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-V2" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#V2"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.3</span> <span>V2</span> </div> </a> <ul id="toc-V2-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-V3" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#V3"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.4</span> <span>V3</span> </div> </a> <ul id="toc-V3-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-R1" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#R1"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.5</span> <span>R1</span> </div> </a> <ul id="toc-R1-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Significance" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Significance"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>Significance</span> </div> </a> <ul id="toc-Significance-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Notes" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Notes"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>Notes</span> </div> </a> <ul id="toc-Notes-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">9</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-External_links" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#External_links"> <div class="vector-toc-text"> <span class="vector-toc-numb">10</span> <span>External links</span> </div> </a> <ul id="toc-External_links-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" title="Table of Contents" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">DeepSeek</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 68 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-68" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">68 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-af mw-list-item"><a href="https://af.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Afrikaans" lang="af" hreflang="af" data-title="DeepSeek" data-language-autonym="Afrikaans" data-language-local-name="Afrikaans" class="interlanguage-link-target"><span>Afrikaans</span></a></li><li class="interlanguage-link interwiki-ar mw-list-item"><a href="https://ar.wikipedia.org/wiki/%D8%AF%D9%8A%D8%A8_%D8%B3%D9%8A%D9%83" title="ديب سيك – Arabic" lang="ar" hreflang="ar" data-title="ديب سيك" data-language-autonym="العربية" data-language-local-name="Arabic" class="interlanguage-link-target"><span>العربية</span></a></li><li class="interlanguage-link interwiki-an mw-list-item"><a href="https://an.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Aragonese" lang="an" hreflang="an" data-title="DeepSeek" data-language-autonym="Aragonés" data-language-local-name="Aragonese" class="interlanguage-link-target"><span>Aragonés</span></a></li><li class="interlanguage-link interwiki-as mw-list-item"><a href="https://as.wikipedia.org/wiki/%E0%A6%A1%E0%A7%80%E0%A6%AA%E0%A6%9B%E0%A7%80%E0%A6%95" title="ডীপছীক – Assamese" lang="as" hreflang="as" data-title="ডীপছীক" data-language-autonym="অসমীয়া" data-language-local-name="Assamese" class="interlanguage-link-target"><span>অসমীয়া</span></a></li><li class="interlanguage-link interwiki-az mw-list-item"><a href="https://az.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Azerbaijani" lang="az" hreflang="az" data-title="DeepSeek" data-language-autonym="Azərbaycanca" data-language-local-name="Azerbaijani" class="interlanguage-link-target"><span>Azərbaycanca</span></a></li><li class="interlanguage-link interwiki-bn mw-list-item"><a href="https://bn.wikipedia.org/wiki/%E0%A6%A1%E0%A6%BF%E0%A6%AA%E0%A6%B8%E0%A6%BF%E0%A6%95" title="ডিপসিক – Bangla" lang="bn" hreflang="bn" data-title="ডিপসিক" data-language-autonym="বাংলা" data-language-local-name="Bangla" class="interlanguage-link-target"><span>বাংলা</span></a></li><li class="interlanguage-link interwiki-be mw-list-item"><a href="https://be.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Belarusian" lang="be" hreflang="be" data-title="DeepSeek" data-language-autonym="Беларуская" data-language-local-name="Belarusian" class="interlanguage-link-target"><span>Беларуская</span></a></li><li class="interlanguage-link interwiki-be-x-old mw-list-item"><a href="https://be-tarask.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Belarusian (Taraškievica orthography)" lang="be-tarask" hreflang="be-tarask" data-title="DeepSeek" data-language-autonym="Беларуская (тарашкевіца)" data-language-local-name="Belarusian (Taraškievica orthography)" class="interlanguage-link-target"><span>Беларуская (тарашкевіца)</span></a></li><li class="interlanguage-link interwiki-bg mw-list-item"><a href="https://bg.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Bulgarian" lang="bg" hreflang="bg" data-title="DeepSeek" data-language-autonym="Български" data-language-local-name="Bulgarian" class="interlanguage-link-target"><span>Български</span></a></li><li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Catalan" lang="ca" hreflang="ca" data-title="DeepSeek" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-cs mw-list-item"><a href="https://cs.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Czech" lang="cs" hreflang="cs" data-title="DeepSeek" data-language-autonym="Čeština" data-language-local-name="Czech" class="interlanguage-link-target"><span>Čeština</span></a></li><li class="interlanguage-link interwiki-da mw-list-item"><a href="https://da.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Danish" lang="da" hreflang="da" data-title="DeepSeek" data-language-autonym="Dansk" data-language-local-name="Danish" class="interlanguage-link-target"><span>Dansk</span></a></li><li class="interlanguage-link interwiki-ary mw-list-item"><a href="https://ary.wikipedia.org/wiki/%D8%AF%D9%8A%D9%BE_%D8%B3%D9%8A%D9%83" title="ديپ سيك – Moroccan Arabic" lang="ary" hreflang="ary" data-title="ديپ سيك" data-language-autonym="الدارجة" data-language-local-name="Moroccan Arabic" class="interlanguage-link-target"><span>الدارجة</span></a></li><li class="interlanguage-link interwiki-de mw-list-item"><a href="https://de.wikipedia.org/wiki/DeepSeek" title="DeepSeek – German" lang="de" hreflang="de" data-title="DeepSeek" data-language-autonym="Deutsch" data-language-local-name="German" class="interlanguage-link-target"><span>Deutsch</span></a></li><li class="interlanguage-link interwiki-el mw-list-item"><a href="https://el.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Greek" lang="el" hreflang="el" data-title="DeepSeek" data-language-autonym="Ελληνικά" data-language-local-name="Greek" class="interlanguage-link-target"><span>Ελληνικά</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Spanish" lang="es" hreflang="es" data-title="DeepSeek" data-language-autonym="Español" data-language-local-name="Spanish" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-eo mw-list-item"><a href="https://eo.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Esperanto" lang="eo" hreflang="eo" data-title="DeepSeek" data-language-autonym="Esperanto" data-language-local-name="Esperanto" class="interlanguage-link-target"><span>Esperanto</span></a></li><li class="interlanguage-link interwiki-eu mw-list-item"><a href="https://eu.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Basque" lang="eu" hreflang="eu" data-title="DeepSeek" data-language-autonym="Euskara" data-language-local-name="Basque" class="interlanguage-link-target"><span>Euskara</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%D8%AF%DB%8C%D9%BE%E2%80%8C%D8%B3%DB%8C%DA%A9" title="دیپسیک – Persian" lang="fa" hreflang="fa" data-title="دیپسیک" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-fr mw-list-item"><a href="https://fr.wikipedia.org/wiki/DeepSeek" title="DeepSeek – French" lang="fr" hreflang="fr" data-title="DeepSeek" data-language-autonym="Français" data-language-local-name="French" class="interlanguage-link-target"><span>Français</span></a></li><li class="interlanguage-link interwiki-fy mw-list-item"><a href="https://fy.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Western Frisian" lang="fy" hreflang="fy" data-title="DeepSeek" data-language-autonym="Frysk" data-language-local-name="Western Frisian" class="interlanguage-link-target"><span>Frysk</span></a></li><li class="interlanguage-link interwiki-ff mw-list-item"><a href="https://ff.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Fula" lang="ff" hreflang="ff" data-title="DeepSeek" data-language-autonym="Fulfulde" data-language-local-name="Fula" class="interlanguage-link-target"><span>Fulfulde</span></a></li><li class="interlanguage-link interwiki-ga mw-list-item"><a href="https://ga.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Irish" lang="ga" hreflang="ga" data-title="DeepSeek" data-language-autonym="Gaeilge" data-language-local-name="Irish" class="interlanguage-link-target"><span>Gaeilge</span></a></li><li class="interlanguage-link interwiki-gl mw-list-item"><a href="https://gl.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Galician" lang="gl" hreflang="gl" data-title="DeepSeek" data-language-autonym="Galego" data-language-local-name="Galician" class="interlanguage-link-target"><span>Galego</span></a></li><li class="interlanguage-link interwiki-ko mw-list-item"><a href="https://ko.wikipedia.org/wiki/%EB%94%A5%EC%8B%9C%ED%81%AC" title="딥시크 – Korean" lang="ko" hreflang="ko" data-title="딥시크" data-language-autonym="한국어" data-language-local-name="Korean" class="interlanguage-link-target"><span>한국어</span></a></li><li class="interlanguage-link interwiki-hi mw-list-item"><a href="https://hi.wikipedia.org/wiki/%E0%A4%A1%E0%A5%80%E0%A4%AA%E0%A4%B8%E0%A5%80%E0%A4%95" title="डीपसीक – Hindi" lang="hi" hreflang="hi" data-title="डीपसीक" data-language-autonym="हिन्दी" data-language-local-name="Hindi" class="interlanguage-link-target"><span>हिन्दी</span></a></li><li class="interlanguage-link interwiki-hr mw-list-item"><a href="https://hr.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Croatian" lang="hr" hreflang="hr" data-title="DeepSeek" data-language-autonym="Hrvatski" data-language-local-name="Croatian" class="interlanguage-link-target"><span>Hrvatski</span></a></li><li class="interlanguage-link interwiki-io mw-list-item"><a href="https://io.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Ido" lang="io" hreflang="io" data-title="DeepSeek" data-language-autonym="Ido" data-language-local-name="Ido" class="interlanguage-link-target"><span>Ido</span></a></li><li class="interlanguage-link interwiki-id mw-list-item"><a href="https://id.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Indonesian" lang="id" hreflang="id" data-title="DeepSeek" data-language-autonym="Bahasa Indonesia" data-language-local-name="Indonesian" class="interlanguage-link-target"><span>Bahasa Indonesia</span></a></li><li class="interlanguage-link interwiki-it mw-list-item"><a href="https://it.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Italian" lang="it" hreflang="it" data-title="DeepSeek" data-language-autonym="Italiano" data-language-local-name="Italian" class="interlanguage-link-target"><span>Italiano</span></a></li><li class="interlanguage-link interwiki-he mw-list-item"><a href="https://he.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Hebrew" lang="he" hreflang="he" data-title="DeepSeek" data-language-autonym="עברית" data-language-local-name="Hebrew" class="interlanguage-link-target"><span>עברית</span></a></li><li class="interlanguage-link interwiki-sw mw-list-item"><a href="https://sw.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Swahili" lang="sw" hreflang="sw" data-title="DeepSeek" data-language-autonym="Kiswahili" data-language-local-name="Swahili" class="interlanguage-link-target"><span>Kiswahili</span></a></li><li class="interlanguage-link interwiki-lv mw-list-item"><a href="https://lv.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Latvian" lang="lv" hreflang="lv" data-title="DeepSeek" data-language-autonym="Latviešu" data-language-local-name="Latvian" class="interlanguage-link-target"><span>Latviešu</span></a></li><li class="interlanguage-link interwiki-hu mw-list-item"><a href="https://hu.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Hungarian" lang="hu" hreflang="hu" data-title="DeepSeek" data-language-autonym="Magyar" data-language-local-name="Hungarian" class="interlanguage-link-target"><span>Magyar</span></a></li><li class="interlanguage-link interwiki-mk mw-list-item"><a href="https://mk.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Macedonian" lang="mk" hreflang="mk" data-title="DeepSeek" data-language-autonym="Македонски" data-language-local-name="Macedonian" class="interlanguage-link-target"><span>Македонски</span></a></li><li class="interlanguage-link interwiki-ml mw-list-item"><a href="https://ml.wikipedia.org/wiki/%E0%B4%A1%E0%B5%80%E0%B4%AA%E0%B5%8D%E0%B4%B8%E0%B5%80%E0%B4%95%E0%B5%8D%E0%B4%95%E0%B5%8D" title="ഡീപ്സീക്ക് – Malayalam" lang="ml" hreflang="ml" data-title="ഡീപ്സീക്ക്" data-language-autonym="മലയാളം" data-language-local-name="Malayalam" class="interlanguage-link-target"><span>മലയാളം</span></a></li><li class="interlanguage-link interwiki-nl mw-list-item"><a href="https://nl.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Dutch" lang="nl" hreflang="nl" data-title="DeepSeek" data-language-autonym="Nederlands" data-language-local-name="Dutch" class="interlanguage-link-target"><span>Nederlands</span></a></li><li class="interlanguage-link interwiki-ne mw-list-item"><a href="https://ne.wikipedia.org/wiki/%E0%A4%A1%E0%A4%BF%E0%A4%AA%E0%A4%B8%E0%A4%BF%E0%A4%95" title="डिपसिक – Nepali" lang="ne" hreflang="ne" data-title="डिपसिक" data-language-autonym="नेपाली" data-language-local-name="Nepali" class="interlanguage-link-target"><span>नेपाली</span></a></li><li class="interlanguage-link interwiki-ja mw-list-item"><a href="https://ja.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Japanese" lang="ja" hreflang="ja" data-title="DeepSeek" data-language-autonym="日本語" data-language-local-name="Japanese" class="interlanguage-link-target"><span>日本語</span></a></li><li class="interlanguage-link interwiki-uz mw-list-item"><a href="https://uz.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Uzbek" lang="uz" hreflang="uz" data-title="DeepSeek" data-language-autonym="Oʻzbekcha / ўзбекча" data-language-local-name="Uzbek" class="interlanguage-link-target"><span>Oʻzbekcha / ўзбекча</span></a></li><li class="interlanguage-link interwiki-ps mw-list-item"><a href="https://ps.wikipedia.org/wiki/%DA%89%D9%8A%D9%BE%D8%B3%D9%8A%DA%A9" title="ډيپسيک – Pashto" lang="ps" hreflang="ps" data-title="ډيپسيک" data-language-autonym="پښتو" data-language-local-name="Pashto" class="interlanguage-link-target"><span>پښتو</span></a></li><li class="interlanguage-link interwiki-pms mw-list-item"><a href="https://pms.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Piedmontese" lang="pms" hreflang="pms" data-title="DeepSeek" data-language-autonym="Piemontèis" data-language-local-name="Piedmontese" class="interlanguage-link-target"><span>Piemontèis</span></a></li><li class="interlanguage-link interwiki-pl mw-list-item"><a href="https://pl.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Polish" lang="pl" hreflang="pl" data-title="DeepSeek" data-language-autonym="Polski" data-language-local-name="Polish" class="interlanguage-link-target"><span>Polski</span></a></li><li class="interlanguage-link interwiki-pt mw-list-item"><a href="https://pt.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Portuguese" lang="pt" hreflang="pt" data-title="DeepSeek" data-language-autonym="Português" data-language-local-name="Portuguese" class="interlanguage-link-target"><span>Português</span></a></li><li class="interlanguage-link interwiki-kaa mw-list-item"><a href="https://kaa.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Kara-Kalpak" lang="kaa" hreflang="kaa" data-title="DeepSeek" data-language-autonym="Qaraqalpaqsha" data-language-local-name="Kara-Kalpak" class="interlanguage-link-target"><span>Qaraqalpaqsha</span></a></li><li class="interlanguage-link interwiki-ro mw-list-item"><a href="https://ro.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Romanian" lang="ro" hreflang="ro" data-title="DeepSeek" data-language-autonym="Română" data-language-local-name="Romanian" class="interlanguage-link-target"><span>Română</span></a></li><li class="interlanguage-link interwiki-qu mw-list-item"><a href="https://qu.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Quechua" lang="qu" hreflang="qu" data-title="DeepSeek" data-language-autonym="Runa Simi" data-language-local-name="Quechua" class="interlanguage-link-target"><span>Runa Simi</span></a></li><li class="interlanguage-link interwiki-ru mw-list-item"><a href="https://ru.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Russian" lang="ru" hreflang="ru" data-title="DeepSeek" data-language-autonym="Русский" data-language-local-name="Russian" class="interlanguage-link-target"><span>Русский</span></a></li><li class="interlanguage-link interwiki-simple mw-list-item"><a href="https://simple.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Simple English" lang="en-simple" hreflang="en-simple" data-title="DeepSeek" data-language-autonym="Simple English" data-language-local-name="Simple English" class="interlanguage-link-target"><span>Simple English</span></a></li><li class="interlanguage-link interwiki-sl mw-list-item"><a href="https://sl.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Slovenian" lang="sl" hreflang="sl" data-title="DeepSeek" data-language-autonym="Slovenščina" data-language-local-name="Slovenian" class="interlanguage-link-target"><span>Slovenščina</span></a></li><li class="interlanguage-link interwiki-ckb mw-list-item"><a href="https://ckb.wikipedia.org/wiki/%D8%AF%DB%8C%D9%BE%D8%B3%DB%8C%DA%A9" title="دیپسیک – Central Kurdish" lang="ckb" hreflang="ckb" data-title="دیپسیک" data-language-autonym="کوردی" data-language-local-name="Central Kurdish" class="interlanguage-link-target"><span>کوردی</span></a></li><li class="interlanguage-link interwiki-sr mw-list-item"><a href="https://sr.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Serbian" lang="sr" hreflang="sr" data-title="DeepSeek" data-language-autonym="Српски / srpski" data-language-local-name="Serbian" class="interlanguage-link-target"><span>Српски / srpski</span></a></li><li class="interlanguage-link interwiki-fi mw-list-item"><a href="https://fi.wikipedia.org/wiki/Deepseek" title="Deepseek – Finnish" lang="fi" hreflang="fi" data-title="Deepseek" data-language-autonym="Suomi" data-language-local-name="Finnish" class="interlanguage-link-target"><span>Suomi</span></a></li><li class="interlanguage-link interwiki-sv mw-list-item"><a href="https://sv.wikipedia.org/wiki/Deepseek" title="Deepseek – Swedish" lang="sv" hreflang="sv" data-title="Deepseek" data-language-autonym="Svenska" data-language-local-name="Swedish" class="interlanguage-link-target"><span>Svenska</span></a></li><li class="interlanguage-link interwiki-tl mw-list-item"><a href="https://tl.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Tagalog" lang="tl" hreflang="tl" data-title="DeepSeek" data-language-autonym="Tagalog" data-language-local-name="Tagalog" class="interlanguage-link-target"><span>Tagalog</span></a></li><li class="interlanguage-link interwiki-ta mw-list-item"><a href="https://ta.wikipedia.org/wiki/%E0%AE%9F%E0%AF%80%E0%AE%AA%E0%AF%8D%E0%AE%9A%E0%AF%80%E0%AE%95%E0%AF%8D" title="டீப்சீக் – Tamil" lang="ta" hreflang="ta" data-title="டீப்சீக்" data-language-autonym="தமிழ்" data-language-local-name="Tamil" class="interlanguage-link-target"><span>தமிழ்</span></a></li><li class="interlanguage-link interwiki-shn mw-list-item"><a href="https://shn.wikipedia.org/wiki/%E1%80%90%E1%80%AD%E1%80%95%E1%80%BA%E1%82%89%E1%80%9E%E1%80%AD%E1%81%B5%E1%80%BA%E1%82%89%E1%81%B6%E1%80%BA" title="တိပ်ႉသိၵ်ႉၶ် – Shan" lang="shn" hreflang="shn" data-title="တိပ်ႉသိၵ်ႉၶ်" data-language-autonym="တႆး" data-language-local-name="Shan" class="interlanguage-link-target"><span>တႆး</span></a></li><li class="interlanguage-link interwiki-te mw-list-item"><a href="https://te.wikipedia.org/wiki/%E0%B0%A1%E0%B1%80%E0%B0%AA%E0%B1%8D_%E0%B0%B8%E0%B1%80%E0%B0%95%E0%B1%8D" title="డీప్ సీక్ – Telugu" lang="te" hreflang="te" data-title="డీప్ సీక్" data-language-autonym="తెలుగు" data-language-local-name="Telugu" class="interlanguage-link-target"><span>తెలుగు</span></a></li><li class="interlanguage-link interwiki-th mw-list-item"><a href="https://th.wikipedia.org/wiki/%E0%B8%94%E0%B8%B5%E0%B8%9B%E0%B8%8B%E0%B8%B5%E0%B8%81" title="ดีปซีก – Thai" lang="th" hreflang="th" data-title="ดีปซีก" data-language-autonym="ไทย" data-language-local-name="Thai" class="interlanguage-link-target"><span>ไทย</span></a></li><li class="interlanguage-link interwiki-tr mw-list-item"><a href="https://tr.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Turkish" lang="tr" hreflang="tr" data-title="DeepSeek" data-language-autonym="Türkçe" data-language-local-name="Turkish" class="interlanguage-link-target"><span>Türkçe</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Ukrainian" lang="uk" hreflang="uk" data-title="DeepSeek" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li><li class="interlanguage-link interwiki-ur mw-list-item"><a href="https://ur.wikipedia.org/wiki/%DA%88%DB%8C%D9%BE_%D8%B3%DB%8C%DA%A9" title="ڈیپ سیک – Urdu" lang="ur" hreflang="ur" data-title="ڈیپ سیک" data-language-autonym="اردو" data-language-local-name="Urdu" class="interlanguage-link-target"><span>اردو</span></a></li><li class="interlanguage-link interwiki-ug mw-list-item"><a href="https://ug.wikipedia.org/wiki/%DA%86%D9%88%DA%AD%D9%82%DB%87%D8%B1_%D9%82%DB%90%D8%AF%D9%89%D8%B1_(%D8%B3%DB%88%D9%86%D8%A6%D9%89%D9%8A_%D8%A6%DB%95%D9%82%D9%89%D9%84)" title="چوڭقۇر قېدىر (سۈنئىي ئەقىل) – Uyghur" lang="ug" hreflang="ug" data-title="چوڭقۇر قېدىر (سۈنئىي ئەقىل)" data-language-autonym="ئۇيغۇرچە / Uyghurche" data-language-local-name="Uyghur" class="interlanguage-link-target"><span>ئۇيغۇرچە / Uyghurche</span></a></li><li class="interlanguage-link interwiki-vi mw-list-item"><a href="https://vi.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Vietnamese" lang="vi" hreflang="vi" data-title="DeepSeek" data-language-autonym="Tiếng Việt" data-language-local-name="Vietnamese" class="interlanguage-link-target"><span>Tiếng Việt</span></a></li><li class="interlanguage-link interwiki-wuu mw-list-item"><a href="https://wuu.wikipedia.org/wiki/%E6%B7%B1%E5%BA%A6%E6%B1%82%E7%B4%A2" title="深度求索 – Wu" lang="wuu" hreflang="wuu" data-title="深度求索" data-language-autonym="吴语" data-language-local-name="Wu" class="interlanguage-link-target"><span>吴语</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/DeepSeek" title="DeepSeek – Cantonese" lang="yue" hreflang="yue" data-title="DeepSeek" data-language-autonym="粵語" data-language-local-name="Cantonese" class="interlanguage-link-target"><span>粵語</span></a></li><li class="interlanguage-link interwiki-zh mw-list-item"><a href="https://zh.wikipedia.org/wiki/%E6%B7%B1%E5%BA%A6%E6%B1%82%E7%B4%A2" title="深度求索 – Chinese" lang="zh" hreflang="zh" data-title="深度求索" data-language-autonym="中文" data-language-local-name="Chinese" class="interlanguage-link-target"><span>中文</span></a></li><li class="interlanguage-link interwiki-syl mw-list-item"><a href="https://syl.wikipedia.org/wiki/%EA%A0%92%EA%A0%A4%EA%A0%99%EA%A0%8D%EA%A0%A4%EA%A0%87" title="ꠒꠤꠙꠍꠤꠇ – Sylheti" lang="syl" hreflang="syl" data-title="ꠒꠤꠙꠍꠤꠇ" data-language-autonym="ꠍꠤꠟꠐꠤ" data-language-local-name="Sylheti" class="interlanguage-link-target"><span>ꠍꠤꠟꠐꠤ</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q131577453#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/DeepSeek" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:DeepSeek" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/DeepSeek"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=DeepSeek&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=DeepSeek&action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/DeepSeek"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=DeepSeek&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=DeepSeek&action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/DeepSeek" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/DeepSeek" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=DeepSeek&oldid=1281555935" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=DeepSeek&action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&page=DeepSeek&id=1281555935&wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FDeepSeek"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FDeepSeek"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&page=DeepSeek&action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=DeepSeek&printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="wb-otherproject-link wb-otherproject-commons mw-list-item"><a href="https://commons.wikimedia.org/wiki/Category:DeepSeek" hreflang="en"><span>Wikimedia Commons</span></a></li><li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q131577453" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Chinese artificial intelligence company</div> <style data-mw-deduplicate="TemplateStyles:r1236090951">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}@media print{body.ns-0 .mw-parser-output .hatnote{display:none!important}}</style><div role="note" class="hatnote navigation-not-searchable">This article is about the company. For the chatbot, see <a href="/wiki/DeepSeek_(chatbot)" title="DeepSeek (chatbot)">DeepSeek (chatbot)</a>.</div> <p class="mw-empty-elt"> </p> <style data-mw-deduplicate="TemplateStyles:r1257001546">.mw-parser-output .infobox-subbox{padding:0;border:none;margin:-3px;width:auto;min-width:100%;font-size:100%;clear:none;float:none;background-color:transparent}.mw-parser-output .infobox-3cols-child{margin:auto}.mw-parser-output .infobox .navbar{font-size:100%}@media screen{html.skin-theme-clientpref-night .mw-parser-output .infobox-full-data:not(.notheme)>div:not(.notheme)[style]{background:#1f1f23!important;color:#f8f9fa}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .infobox-full-data:not(.notheme) div:not(.notheme){background:#1f1f23!important;color:#f8f9fa}}@media(min-width:640px){body.skin--responsive .mw-parser-output .infobox-table{display:table!important}body.skin--responsive .mw-parser-output .infobox-table>caption{display:table-caption!important}body.skin--responsive .mw-parser-output .infobox-table>tbody{display:table-row-group}body.skin--responsive .mw-parser-output .infobox-table tr{display:table-row!important}body.skin--responsive .mw-parser-output .infobox-table th,body.skin--responsive .mw-parser-output .infobox-table td{padding-left:inherit;padding-right:inherit}}</style><style data-mw-deduplicate="TemplateStyles:r1242257876">.mw-parser-output .ib-company .infobox-label{padding-right:0.5em}.mw-parser-output .ib-company .infobox-data,.mw-parser-output .ib-company .infobox-below{line-height:1.35em}.mw-parser-output .ib-company-logo img{background-color:#f8f9fa}.mw-parser-output .ib-company-locality,.mw-parser-output .ib-company-country{display:inline}</style><table class="infobox ib-company vcard"><caption class="infobox-title fn org" style="font-size: 125%;">Hangzhou DeepSeek Artificial Intelligence Basic Technology Research Co., Ltd.</caption><tbody><tr><td colspan="2" class="infobox-image ib-company-logo logo"><span class="mw-default-size" typeof="mw:File/Frameless"><a href="/wiki/File:DeepSeek_logo.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/e/ec/DeepSeek_logo.svg/330px-DeepSeek_logo.svg.png" decoding="async" width="280" height="60" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/e/ec/DeepSeek_logo.svg/500px-DeepSeek_logo.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/e/ec/DeepSeek_logo.svg/560px-DeepSeek_logo.svg.png 2x" data-file-width="512" data-file-height="109" /></a></span></td></tr><tr><th scope="row" class="infobox-label"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0;">Native name</div></th><td class="infobox-data"><span title="Chinese-language text"><span lang="zh-CN">杭州深度求索人工智能基础技术研究有限公司</span></span></td></tr><tr><th scope="row" class="infobox-label">Company type</th><td class="infobox-data category"><a href="/wiki/Privately_held_company" title="Privately held company">Private</a></td></tr><tr><th scope="row" class="infobox-label">Industry</th><td class="infobox-data category"><a href="/wiki/Information_technology" title="Information technology">Information technology</a><br /><a href="/wiki/Artificial_intelligence" title="Artificial intelligence">Artificial intelligence</a></td></tr><tr><th scope="row" class="infobox-label">Founded</th><td class="infobox-data">17 July 2023<span class="noprint">; 20 months ago</span><span style="display:none"> (<span class="bday dtstart published updated">2023-07-17</span>)</span><sup id="cite_ref-DeepSeek突传消息!_1-0" class="reference"><a href="#cite_note-DeepSeek突传消息!-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup></td></tr><tr><th scope="row" class="infobox-label">Founder</th><td class="infobox-data agent"><style data-mw-deduplicate="TemplateStyles:r1126788409">.mw-parser-output .plainlist ol,.mw-parser-output .plainlist ul{line-height:inherit;list-style:none;margin:0;padding:0}.mw-parser-output .plainlist ol li,.mw-parser-output .plainlist ul li{margin-bottom:0}</style><div class="plainlist"><ul><li><a href="/wiki/Liang_Wenfeng" title="Liang Wenfeng">Liang Wenfeng</a></li></ul></div></td></tr><tr><th scope="row" class="infobox-label">Headquarters</th><td class="infobox-data label"><a href="/wiki/Hangzhou" title="Hangzhou">Hangzhou</a>, <a href="/wiki/Zhejiang" title="Zhejiang">Zhejiang</a>, China</td></tr><tr><th scope="row" class="infobox-label"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0;">Key people</div></th><td class="infobox-data agent"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1126788409" /><div class="plainlist"><ul><li>Liang Wenfeng (CEO)</li></ul></div></td></tr><tr><th scope="row" class="infobox-label">Owner</th><td class="infobox-data"><a href="/wiki/High-Flyer" title="High-Flyer">High-Flyer</a></td></tr><tr><th scope="row" class="infobox-label"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0;">Number of employees</div></th><td class="infobox-data">160 (2025)<sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup></td></tr><tr><th scope="row" class="infobox-label">Website</th><td class="infobox-data"><span class="url"><a rel="nofollow" class="external text" href="https://www.deepseek.com">www<wbr />.deepseek<wbr />.com</a></span> <span class="mw-valign-text-top noprint" typeof="mw:File/Frameless"><a href="https://www.wikidata.org/wiki/Q131577453#P856" title="Edit this at Wikidata"><img alt="Edit this at Wikidata" src="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png" decoding="async" width="10" height="10" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/15px-OOjs_UI_icon_edit-ltr-progressive.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/20px-OOjs_UI_icon_edit-ltr-progressive.svg.png 2x" data-file-width="20" data-file-height="20" /></a></span></td></tr></tbody></table> <p><b>Hangzhou DeepSeek Artificial Intelligence Basic Technology Research Co., Ltd.</b>,<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-4" class="reference"><a href="#cite_note-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span class="cite-bracket">[</span>a<span class="cite-bracket">]</span></a></sup> <a href="/wiki/Trade_name" title="Trade name">doing business as</a> <b>DeepSeek</b>,<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span class="cite-bracket">[</span>b<span class="cite-bracket">]</span></a></sup> is a Chinese <a href="/wiki/Artificial_intelligence" title="Artificial intelligence">artificial intelligence</a> company that develops <a href="/wiki/Large_language_model" title="Large language model">large language models</a> (LLMs). Based in <a href="/wiki/Hangzhou,_Zhejiang" class="mw-redirect" title="Hangzhou, Zhejiang">Hangzhou, Zhejiang</a>, it is owned and funded by the Chinese hedge fund <a href="/wiki/High-Flyer" title="High-Flyer">High-Flyer</a>. DeepSeek was founded in July 2023 by <a href="/wiki/Liang_Wenfeng" title="Liang Wenfeng">Liang Wenfeng</a>, the co-founder of High-Flyer, who also serves as the <a href="/wiki/Chief_executive_officer" title="Chief executive officer">CEO</a> for both companies.<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-10" class="reference"><a href="#cite_note-10"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-11" class="reference"><a href="#cite_note-11"><span class="cite-bracket">[</span>9<span class="cite-bracket">]</span></a></sup> The company launched <a href="/wiki/DeepSeek_(chatbot)" title="DeepSeek (chatbot)">an eponymous chatbot</a> alongside its DeepSeek-R1 model in January 2025. </p><p>Released under the <a href="/wiki/MIT_License" title="MIT License">MIT License</a>, DeepSeek-R1 provides responses comparable to other contemporary large language models, such as <a href="/wiki/OpenAI" title="OpenAI">OpenAI</a>'s <a href="/wiki/GPT-4o" title="GPT-4o">GPT-4o</a> and <a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a>.<sup id="cite_ref-12" class="reference"><a href="#cite_note-12"><span class="cite-bracket">[</span>10<span class="cite-bracket">]</span></a></sup> Its training cost is reported to be significantly lower than other LLMs. The company claims that it trained its V3 model for US$6 million compared to $100 million for OpenAI's <a href="/wiki/GPT-4" title="GPT-4">GPT-4</a> in 2023,<sup id="cite_ref-vincent_13-0" class="reference"><a href="#cite_note-vincent-13"><span class="cite-bracket">[</span>11<span class="cite-bracket">]</span></a></sup> and approximately one-tenth of the computing power used for <a href="/wiki/Meta_Platforms" title="Meta Platforms">Meta</a>'s comparable model, <a href="/wiki/Llama_3.1" class="mw-redirect" title="Llama 3.1">Llama 3.1</a>.<sup id="cite_ref-vincent_13-1" class="reference"><a href="#cite_note-vincent-13"><span class="cite-bracket">[</span>11<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-Metz-2025a_14-0" class="reference"><a href="#cite_note-Metz-2025a-14"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-Cosgrove-2025_15-0" class="reference"><a href="#cite_note-Cosgrove-2025-15"><span class="cite-bracket">[</span>13<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-Erdil_16-0" class="reference"><a href="#cite_note-Erdil-16"><span class="cite-bracket">[</span>14<span class="cite-bracket">]</span></a></sup> DeepSeek's success against larger and more established rivals has been described as "upending AI".<sup id="cite_ref-Metz-2025b_17-0" class="reference"><a href="#cite_note-Metz-2025b-17"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-18" class="reference"><a href="#cite_note-18"><span class="cite-bracket">[</span>16<span class="cite-bracket">]</span></a></sup> </p><p>DeepSeek's models are "open weight", which provides less freedom for modification than true <a href="/wiki/Open-source_software" title="Open-source software">open-source software</a>.<sup id="cite_ref-Delbert_19-0" class="reference"><a href="#cite_note-Delbert-19"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-20" class="reference"><a href="#cite_note-20"><span class="cite-bracket">[</span>18<span class="cite-bracket">]</span></a></sup> The company reportedly recruits AI researchers from top Chinese universities<sup id="cite_ref-Metz-2025b_17-1" class="reference"><a href="#cite_note-Metz-2025b-17"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup> and hires from outside the <a href="/wiki/Computer_science" title="Computer science">computer science</a> field to diversify its models' knowledge and abilities.<sup id="cite_ref-Metz-2025a_14-1" class="reference"><a href="#cite_note-Metz-2025a-14"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup> </p><p>The DeepSeek R1 model was trained at a significantly lower cost than other models by using techniques such as <a href="/wiki/Mixture_of_experts" title="Mixture of experts">mixture of experts</a> to reduce costs.<sup id="cite_ref-Metz-2025c_21-0" class="reference"><a href="#cite_note-Metz-2025c-21"><span class="cite-bracket">[</span>19<span class="cite-bracket">]</span></a></sup> The model was also trained during ongoing trade restrictions on AI chip exports to China, causing it to be trained on weaker AI chips made for export to China,<sup id="cite_ref-Cosgrove-2025_15-1" class="reference"><a href="#cite_note-Cosgrove-2025-15"><span class="cite-bracket">[</span>13<span class="cite-bracket">]</span></a></sup> and using fewer chips compared to other models.<sup id="cite_ref-Metz-2025b_17-2" class="reference"><a href="#cite_note-Metz-2025b-17"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup> This breakthrough in reducing expenses, although increasing efficiency and maintaining the model's performance power and quality in the AI industry, sent "shockwaves" through the market. It threatened the dominance of AI leaders like Nvidia and contributed to the largest drop for a single company in US stock market history, as Nvidia lost $600 billion in market value.<sup id="cite_ref-22" class="reference"><a href="#cite_note-22"><span class="cite-bracket">[</span>20<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-23" class="reference"><a href="#cite_note-23"><span class="cite-bracket">[</span>21<span class="cite-bracket">]</span></a></sup> </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="History">History</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=1" title="Edit section: History"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-heading mw-heading3"><h3 id="Founding_and_early_years_(2016–2023)"><span id="Founding_and_early_years_.282016.E2.80.932023.29"></span>Founding and early years (2016–2023)</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=2" title="Edit section: Founding and early years (2016–2023)"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>In February 2016, High-Flyer was co-founded by AI enthusiast Liang Wenfeng, who had been trading since the <a href="/wiki/2007%E2%80%932008_financial_crisis" class="mw-redirect" title="2007–2008 financial crisis">2007–2008 financial crisis</a> while attending <a href="/wiki/Zhejiang_University" title="Zhejiang University">Zhejiang University</a>.<sup id="cite_ref-24" class="reference"><a href="#cite_note-24"><span class="cite-bracket">[</span>22<span class="cite-bracket">]</span></a></sup> The company began stock trading using a <a href="/wiki/GPU" class="mw-redirect" title="GPU">GPU</a>-dependent deep learning model on 21 October 2016. Prior to this, it used <a href="/wiki/CPU" class="mw-redirect" title="CPU">CPU</a>-based models, mainly linear models. Most trading was driven by AI by the end of 2017.<sup id="cite_ref-HI_25-0" class="reference"><a href="#cite_note-HI-25"><span class="cite-bracket">[</span>23<span class="cite-bracket">]</span></a></sup> </p><p>In 2019, Liang established High-Flyer as a hedge fund focused on developing and using AI trading algorithms. By 2021, High-Flyer exclusively used AI in trading,<sup id="cite_ref-Ottinger-2024_26-0" class="reference"><a href="#cite_note-Ottinger-2024-26"><span class="cite-bracket">[</span>24<span class="cite-bracket">]</span></a></sup> often using <a href="/wiki/Nvidia" title="Nvidia">Nvidia</a> chips.<sup id="cite_ref-FT_2025_27-0" class="reference"><a href="#cite_note-FT_2025-27"><span class="cite-bracket">[</span>25<span class="cite-bracket">]</span></a></sup> </p><p>Initial computing cluster Fire-Flyer began construction in 2019 and finished in 2020, at a cost of 200 million yuan. It contained 1,100 GPUs interconnected at a rate of 200 Gbit/s. It was 'retired' after 1.5 years in operation.<sup id="cite_ref-HI_25-1" class="reference"><a href="#cite_note-HI-25"><span class="cite-bracket">[</span>23<span class="cite-bracket">]</span></a></sup> </p><p>In 2021, Liang began stockpiling Nvidia GPUs for an AI project.<sup id="cite_ref-FT_2025_27-1" class="reference"><a href="#cite_note-FT_2025-27"><span class="cite-bracket">[</span>25<span class="cite-bracket">]</span></a></sup> According to <a href="/wiki/36Kr" title="36Kr">36Kr</a>, Liang acquired 10,000 <a href="/wiki/Ampere_(microarchitecture)#A100_accelerator_and_DGX_A100" title="Ampere (microarchitecture)">Nvidia A100</a> GPUs<sup id="cite_ref-CNBC_2023_28-0" class="reference"><a href="#cite_note-CNBC_2023-28"><span class="cite-bracket">[</span>26<span class="cite-bracket">]</span></a></sup> before the United States restricted chip sales to China.<sup id="cite_ref-Ottinger-2024_26-1" class="reference"><a href="#cite_note-Ottinger-2024-26"><span class="cite-bracket">[</span>24<span class="cite-bracket">]</span></a></sup> Computing cluster Fire-Flyer 2 began construction in 2021 with a budget of 1 billion yuan.<sup id="cite_ref-HI_25-2" class="reference"><a href="#cite_note-HI-25"><span class="cite-bracket">[</span>23<span class="cite-bracket">]</span></a></sup> </p><p>It was reported that in 2022, Fire-Flyer 2's capacity had been used at over 96%, totaling 56.74 million GPU hours. 27% was used to support scientific computing outside the company.<sup id="cite_ref-HI_25-3" class="reference"><a href="#cite_note-HI-25"><span class="cite-bracket">[</span>23<span class="cite-bracket">]</span></a></sup> </p><p>During 2022, Fire-Flyer 2 had 5000 <a href="/wiki/PCI_Express" title="PCI Express">PCIe</a> A100 GPUs in 625 nodes, each containing 8 GPUs. At the time, it exclusively used PCIe instead of the <a href="/wiki/Nvidia_DGX" title="Nvidia DGX">DGX</a> version of A100, since at the time the models it trained could fit within a single 40 GB GPU <a href="/wiki/Video_random-access_memory" title="Video random-access memory">VRAM</a> and so there was no need for the higher bandwidth of DGX (i.e., it required only data parallelism but not model parallelism).<sup id="cite_ref-RD_29-0" class="reference"><a href="#cite_note-RD-29"><span class="cite-bracket">[</span>27<span class="cite-bracket">]</span></a></sup> Later, it incorporated <a href="/wiki/NVLink" title="NVLink">NVLinks</a> and NCCL to train larger models that required model parallelism.<sup id="cite_ref-Deng,_Chengqi-2024_30-0" class="reference"><a href="#cite_note-Deng,_Chengqi-2024-30"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-DL_31-0" class="reference"><a href="#cite_note-DL-31"><span class="cite-bracket">[</span>29<span class="cite-bracket">]</span></a></sup> </p><p>On 14 April 2023,<sup id="cite_ref-32" class="reference"><a href="#cite_note-32"><span class="cite-bracket">[</span>30<span class="cite-bracket">]</span></a></sup> High-Flyer announced the start of an <a href="/wiki/Artificial_general_intelligence" title="Artificial general intelligence">artificial general intelligence</a> lab dedicated to research developing AI tools separate from High-Flyer's financial business.<sup id="cite_ref-33" class="reference"><a href="#cite_note-33"><span class="cite-bracket">[</span>31<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-scmp_1_January_2025_34-0" class="reference"><a href="#cite_note-scmp_1_January_2025-34"><span class="cite-bracket">[</span>32<span class="cite-bracket">]</span></a></sup> Incorporated on 17 July 2023,<sup id="cite_ref-DeepSeek突传消息!_1-1" class="reference"><a href="#cite_note-DeepSeek突传消息!-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> with High-Flyer as the investor and backer, the lab became its own company, DeepSeek.<sup id="cite_ref-Ottinger-2024_26-2" class="reference"><a href="#cite_note-Ottinger-2024-26"><span class="cite-bracket">[</span>24<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-McMorrow-2024_35-0" class="reference"><a href="#cite_note-McMorrow-2024-35"><span class="cite-bracket">[</span>33<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-scmp_1_January_2025_34-1" class="reference"><a href="#cite_note-scmp_1_January_2025-34"><span class="cite-bracket">[</span>32<span class="cite-bracket">]</span></a></sup> <a href="/wiki/Venture_capital" title="Venture capital">Venture capital</a> firms were reluctant to provide funding, as they considered it unlikely that the venture would be able to quickly generate an "<a href="/wiki/Exit_(investing)" title="Exit (investing)">exit</a>".<sup id="cite_ref-Ottinger-2024_26-3" class="reference"><a href="#cite_note-Ottinger-2024-26"><span class="cite-bracket">[</span>24<span class="cite-bracket">]</span></a></sup> </p><p>On 16 May 2023, the company Beijing DeepSeek Artificial Intelligence Basic Technology Research Company, Limited. was incorporated. It was later taken under 100% control of Hangzhou DeepSeek Artificial Intelligence Basic Technology Research Co., Ltd, which was incorporated two months after.<sup class="noprint Inline-Template Template-Fact" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources. (February 2025)">citation needed</span></a></i>]</sup> </p> <div class="mw-heading mw-heading3"><h3 id="Model_releases_(2023–present)"><span id="Model_releases_.282023.E2.80.93present.29"></span>Model releases (2023–present)</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=3" title="Edit section: Model releases (2023–present)"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>On 2 November 2023, DeepSeek released its first model, DeepSeek Coder. On 29 November 2023, DeepSeek released the DeepSeek-LLM series of models.<sup id="cite_ref-Dong,_Kai-2024_36-0" class="reference"><a href="#cite_note-Dong,_Kai-2024-36"><span class="cite-bracket">[</span>34<span class="cite-bracket">]</span></a></sup><sup class="reference nowrap"><span title="Location: section 5">: section 5 </span></sup> On 9 January 2024, it released two DeepSeek-MoE models (Base and Chat);<sup id="cite_ref-Dai-2024_37-0" class="reference"><a href="#cite_note-Dai-2024-37"><span class="cite-bracket">[</span>35<span class="cite-bracket">]</span></a></sup> and in April three DeepSeek-Math models Base, Instruct, and RL.<sup id="cite_ref-PL_38-0" class="reference"><a href="#cite_note-PL-38"><span class="cite-bracket">[</span>36<span class="cite-bracket">]</span></a></sup> </p><p>DeepSeek-V2 was released in May 2024, and the following month the DeepSeek-Coder V2 series.<sup id="cite_ref-V2_39-0" class="reference"><a href="#cite_note-V2-39"><span class="cite-bracket">[</span>37<span class="cite-bracket">]</span></a></sup> In September, DeepSeek V2.5 was released in September, updated in December.<sup id="cite_ref-HF_40-0" class="reference"><a href="#cite_note-HF-40"><span class="cite-bracket">[</span>38<span class="cite-bracket">]</span></a></sup> On 20 November, DeepSeek-R1-Lite-Preview became accessible via API and chat.<sup id="cite_ref-DSLI_1_41-0" class="reference"><a href="#cite_note-DSLI_1-41"><span class="cite-bracket">[</span>39<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-RP_42-0" class="reference"><a href="#cite_note-RP-42"><span class="cite-bracket">[</span>40<span class="cite-bracket">]</span></a></sup> In December, the company released the base model DeepSeek-V3-Base and the chat model DeepSeek-V3.<sup id="cite_ref-Deng,_Chengqi-2024_30-1" class="reference"><a href="#cite_note-Deng,_Chengqi-2024-30"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup> </p> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Deepseek_login_error.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/3/35/Deepseek_login_error.png/220px-Deepseek_login_error.png" decoding="async" width="220" height="138" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/35/Deepseek_login_error.png/330px-Deepseek_login_error.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/35/Deepseek_login_error.png/440px-Deepseek_login_error.png 2x" data-file-width="2560" data-file-height="1600" /></a><figcaption>The DeepSeek login page shortly after a <a href="/wiki/Cyberattack" title="Cyberattack">cyberattack</a> that occurred following its January 20 launch</figcaption></figure><p>On 20 January 2025, DeepSeek released the <a href="/wiki/DeepSeek_(chatbot)" title="DeepSeek (chatbot)">DeepSeek chatbot</a>, based on the DeepSeek-R1 model, free of charge for <a href="/wiki/IOS" title="IOS">iOS</a> and <a href="/wiki/Android_(operating_system)" title="Android (operating system)">Android</a>. By 27 January, DeepSeek had surpassed <a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a> as the most downloaded freeware app on the <a href="/wiki/App_Store_(Apple)" title="App Store (Apple)">iOS App Store</a> in the United States,<sup id="cite_ref-Metz-2025b_17-3" class="reference"><a href="#cite_note-Metz-2025b-17"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup> causing Nvidia's share price to drop by 18%.<sup id="cite_ref-43" class="reference"><a href="#cite_note-43"><span class="cite-bracket">[</span>41<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-44" class="reference"><a href="#cite_note-44"><span class="cite-bracket">[</span>42<span class="cite-bracket">]</span></a></sup> </p><div class="mw-heading mw-heading2"><h2 id="Company_operation">Company operation</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=4" title="Edit section: Company operation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Based in <a href="/wiki/Hangzhou,_Zhejiang" class="mw-redirect" title="Hangzhou, Zhejiang">Hangzhou, Zhejiang</a>, DeepSeek is owned and funded by the Chinese hedge fund <a href="/wiki/High-Flyer" title="High-Flyer">High-Flyer</a> co-founder <a href="/wiki/Liang_Wenfeng" title="Liang Wenfeng">Liang Wenfeng</a>, who also serves as its <a href="/wiki/Chief_executive_officer" title="Chief executive officer">CEO.</a> As of May 2024, Liang owned 84% of DeepSeek through two <a href="/wiki/Shell_corporation" title="Shell corporation">shell corporations</a>.<sup id="cite_ref-45" class="reference"><a href="#cite_note-45"><span class="cite-bracket">[</span>note 1<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-46" class="reference"><a href="#cite_note-46"><span class="cite-bracket">[</span>43<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Strategy">Strategy</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=5" title="Edit section: Strategy"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>DeepSeek is focused on research and has no detailed plans for commercialization.<sup id="cite_ref-Schneider-2024_47-0" class="reference"><a href="#cite_note-Schneider-2024-47"><span class="cite-bracket">[</span>44<span class="cite-bracket">]</span></a></sup> This strategy allows its technology to avoid the most stringent provisions of China's AI regulations, such as requiring consumer-facing technology to comply with government controls on information.<sup id="cite_ref-Metz-2025a_14-2" class="reference"><a href="#cite_note-Metz-2025a-14"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup> </p><p>DeepSeek's hiring preferences target technical abilities rather than work experience; most new hires are either recent university graduates or developers whose AI careers are less established.<sup id="cite_ref-scmp_1_January_2025_34-2" class="reference"><a href="#cite_note-scmp_1_January_2025-34"><span class="cite-bracket">[</span>32<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-Metz-2025a_14-3" class="reference"><a href="#cite_note-Metz-2025a-14"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup> Likewise, the company recruits individuals without computer science background to help its technologists understand more knowledge areas,<sup id="cite_ref-Metz-2025b_17-4" class="reference"><a href="#cite_note-Metz-2025b-17"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup> such as poetry and China's notoriously difficult <a href="/wiki/Gaokao" title="Gaokao">college admissions exams (Gaokao)</a>.<sup id="cite_ref-Metz-2025a_14-4" class="reference"><a href="#cite_note-Metz-2025a-14"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Training_framework">Training framework</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=6" title="Edit section: Training framework"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>High-Flyer/DeepSeek operates at least two computing clusters, Fire-Flyer (萤火一号) and Fire-Flyer 2 (萤火二号). Fire-Flyer 2 consists of co-designed software and hardware architecture. On the hardware side, Nvidia GPUs use 200 <a href="/wiki/Data-rate_units" title="Data-rate units">Gbps</a> interconnects. The cluster is divided into two "zones", and the platform supports cross-zone tasks. The network topology was two <a href="/wiki/Fat_tree" title="Fat tree">fat trees</a>, chosen for high <a href="/wiki/Bisection_bandwidth" title="Bisection bandwidth">bisection bandwidth</a>. On the software side are:<sup id="cite_ref-DL_31-1" class="reference"><a href="#cite_note-DL-31"><span class="cite-bracket">[</span>29<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-HI_25-4" class="reference"><a href="#cite_note-HI-25"><span class="cite-bracket">[</span>23<span class="cite-bracket">]</span></a></sup> </p> <ul><li><code>3FS</code> (Fire-Flyer File System): A <a href="/wiki/Clustered_file_system" title="Clustered file system">distributed parallel file system</a>, specifically designed for asynchronous random reads. It uses Direct I/O and <a href="/wiki/Remote_direct_memory_access" title="Remote direct memory access">RDMA Read</a>. In contrast to standard Buffered I/O, Direct I/O does not cache data. Caching is useless for this case, since each data read is random and is not reused.<sup id="cite_ref-48" class="reference"><a href="#cite_note-48"><span class="cite-bracket">[</span>45<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-49" class="reference"><a href="#cite_note-49"><span class="cite-bracket">[</span>46<span class="cite-bracket">]</span></a></sup></li> <li><code>hfreduce</code>: Library for asynchronous communication, originally designed to replace Nvidia Collective Communication Library (NCCL).<sup id="cite_ref-RD_29-1" class="reference"><a href="#cite_note-RD-29"><span class="cite-bracket">[</span>27<span class="cite-bracket">]</span></a></sup> It is mainly used for <a href="/wiki/Allreduce" class="mw-redirect" title="Allreduce">allreduce</a>, especially of gradients during <a href="/wiki/Backpropagation" title="Backpropagation">backpropagation</a>. It is asynchronously run on the CPU to avoid blocking <a href="/wiki/Compute_kernel" title="Compute kernel">kernels</a> on the GPU.<sup id="cite_ref-DL_31-2" class="reference"><a href="#cite_note-DL-31"><span class="cite-bracket">[</span>29<span class="cite-bracket">]</span></a></sup> It uses <a href="/wiki/Two-tree_broadcast" title="Two-tree broadcast">two-tree broadcast</a> like NCCL.<sup id="cite_ref-RD_29-2" class="reference"><a href="#cite_note-RD-29"><span class="cite-bracket">[</span>27<span class="cite-bracket">]</span></a></sup></li> <li><code>hfai.nn</code>: Software library of commonly used operators for neural network training, similar to <code>torch.nn</code> in <a href="/wiki/PyTorch" title="PyTorch">PyTorch</a>.</li> <li><code>HaiScale Distributed Data Parallel</code> (DDP): Parallel training library that implements various forms of parallelism such as <a href="/wiki/Data_parallelism" title="Data parallelism">Data Parallelism</a> (DP), <a href="/wiki/Pipeline_(computing)" title="Pipeline (computing)">Pipeline Parallelism</a> (PP), Tensor Parallelism (TP), Experts Parallelism (EP), Fully Sharded Data Parallel (FSDP) and Zero Redundancy Optimizer (ZeRO). It is similar to PyTorch DDP, which uses NCCL on the backend.</li> <li><code>HAI Platform</code>: Various applications such as task scheduling, fault handling, and disaster recovery.<sup id="cite_ref-50" class="reference"><a href="#cite_note-50"><span class="cite-bracket">[</span>47<span class="cite-bracket">]</span></a></sup></li></ul> <p>As of 2022, Fire-Flyer 2 had 5000 <a href="/wiki/PCI_Express" title="PCI Express">PCIe</a> A100 GPUs in 625 nodes, each containing 8 GPUs.<sup id="cite_ref-RD_29-3" class="reference"><a href="#cite_note-RD-29"><span class="cite-bracket">[</span>27<span class="cite-bracket">]</span></a></sup> It later incorporated NVLinks and NCCL to train larger models that required model parallelism.<sup id="cite_ref-Deng,_Chengqi-2024_30-2" class="reference"><a href="#cite_note-Deng,_Chengqi-2024-30"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-DL_31-3" class="reference"><a href="#cite_note-DL-31"><span class="cite-bracket">[</span>29<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Development_and_release_history">Development and release history</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=7" title="Edit section: Development and release history"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <table class="wikitable"> <caption>Major versions of DeepSeek models. SFT stands for supervised finetuning. </caption> <tbody><tr> <th>Major versions </th> <th>Release date </th> <th>Major variants </th> <th>Remarks </th></tr> <tr> <td>DeepSeek Coder </td> <td>2 Nov 2023 </td> <td>Base (pretrained); Instruct (with instruction-finetuned) </td> <td rowspan="2">The architecture is essentially the same as Llama. </td></tr> <tr> <td>DeepSeek-LLM </td> <td>29 Nov 2023 </td> <td>Base; <p>Chat (with SFT) </p> </td></tr> <tr> <td>DeepSeek-MoE </td> <td>9 Jan 2024 </td> <td>Base; <p>Chat </p> </td> <td>Developed a variant of <a href="/wiki/Mixture_of_experts" title="Mixture of experts">mixture of experts</a> (MoE). </td></tr> <tr> <td rowspan="3">DeepSeek-Math </td> <td rowspan="3">Apr 2024 </td> <td>Base </td> <td>Initialized with DS-Coder-Base-v1.5 </td></tr> <tr> <td>Instruct (with SFT) </td> <td> </td></tr> <tr> <td>RL (using a process reward model) </td> <td>Developed <a href="/wiki/Group_Relative_Policy_Optimization" class="mw-redirect" title="Group Relative Policy Optimization">Group Relative Policy Optimization (GRPO)</a>, a variant of <a href="/wiki/Proximal_Policy_Optimization" class="mw-redirect" title="Proximal Policy Optimization">Proximal Policy Optimization</a> (PPO). </td></tr> <tr> <td>DeepSeek V2 </td> <td>May 2024 </td> <td>DeepSeek-V2, DeepSeek-V2-Chat <p>DeepSeek-V2-Lite, DeepSeek-V2-Lite-Chat </p><p>DeepSeek-Coder-V2 </p><p>DeepSeek-V2.5 </p> </td> <td>Developed multi-head latent attention (MLA). Also used mixture of experts (MoE). <p>Implemented KV caching. </p> </td></tr> <tr> <td>DeepSeek V3 </td> <td>Dec 2024 </td> <td>DeepSeek-V3-Base <p>DeepSeek-V3 (a chat model) </p> </td> <td>The architecture is essentially the same as V2. </td></tr> <tr> <td rowspan="3">DeepSeek R1 </td> <td>20 Nov 2024 </td> <td>DeepSeek-R1-Lite-Preview </td> <td>Only accessed through API and a chat interface. </td></tr> <tr> <td rowspan="2">20 Jan 2025 </td> <td>DeepSeek-R1 <p>DeepSeek-R1-Zero </p> </td> <td>Initialized from DeepSeek-V3-Base and sharing the V3 architecture. </td></tr> <tr> <td>Distilled models </td> <td>Initialized from other models, such as Llama, Qwen, etc. Distilled from data synthesized by R1 and R1-Zero.<sup id="cite_ref-Ma,_Shirong-2025_51-0" class="reference"><a href="#cite_note-Ma,_Shirong-2025-51"><span class="cite-bracket">[</span>48<span class="cite-bracket">]</span></a></sup> </td></tr></tbody></table> <p>The first DeepSeek models were essentially the same as Llama,<sup id="cite_ref-Dong,_Kai-2024_36-1" class="reference"><a href="#cite_note-Dong,_Kai-2024-36"><span class="cite-bracket">[</span>34<span class="cite-bracket">]</span></a></sup> which were dense decoder-only <a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">transformers</a>. Later models incorporated the multi-head latent attention (MLA), Mixture of Experts (MoE), and KV caching.<sup id="cite_ref-Dai-2024_37-1" class="reference"><a href="#cite_note-Dai-2024-37"><span class="cite-bracket">[</span>35<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-V2_39-1" class="reference"><a href="#cite_note-V2-39"><span class="cite-bracket">[</span>37<span class="cite-bracket">]</span></a></sup><sup class="noprint Inline-Template" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Verifiability" title="Wikipedia:Verifiability"><span title="The material near this tag needs to be fact-checked with the cited source(s). (March 2025)">verification needed</span></a></i>]</sup> </p><p>A decoder-only transformer consists of multiple identical decoder layers. Each of these layers features two main components: an attention layer and a FeedForward network (FFN) layer.<sup id="cite_ref-V2_39-2" class="reference"><a href="#cite_note-V2-39"><span class="cite-bracket">[</span>37<span class="cite-bracket">]</span></a></sup> In the attention layer, the traditional multi-head attention mechanism has been enhanced with multi-head latent attention. This update introduces compressed latent vectors to boost performance and reduce memory usage during inference.<sup id="cite_ref-V2_39-3" class="reference"><a href="#cite_note-V2-39"><span class="cite-bracket">[</span>37<span class="cite-bracket">]</span></a></sup><sup class="noprint Inline-Template Template-Fact" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources. (February 2025)">citation needed</span></a></i>]</sup> </p><p>Meanwhile, the FFN layer adopts a variant of the mixture of experts (MoE) approach, effectively doubling the number of experts compared to standard implementations. It distinguishes between two types of experts: shared experts, which are always active to encapsulate general knowledge, and routed experts, only a select few of which are activated to capture specialized information.<sup id="cite_ref-Dai-2024_37-2" class="reference"><a href="#cite_note-Dai-2024-37"><span class="cite-bracket">[</span>35<span class="cite-bracket">]</span></a></sup><sup class="noprint Inline-Template Template-Fact" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources. (February 2025)">citation needed</span></a></i>]</sup> </p><p>Consider the current sequence of <i>n</i> tokens as input. To predict the next token based on the current input, the attention mechanism involves extensive calculations of matrices, including query (Q), key (K), and value (V) matrices. The dimensions of Q, K, and V are determined by the current number of tokens and the model’s embedding size. Once the new token is generated, the autoregressive procedure appends it to the end of the input sequence, and the transformer layers repeat the matrix calculation for the next token. A mathematical analysis reveals that the new token introduces a new query, key, and value vector, appended to Q, K, and V, respectively. Appending these new vectors to the K and V matrices is sufficient for calculating the next token prediction. Consequently, storing the current K and V matrices in memory saves time by avoiding the recalculation of the attention matrix. This feature is known as K-V caching.<sup id="cite_ref-V2_39-4" class="reference"><a href="#cite_note-V2-39"><span class="cite-bracket">[</span>37<span class="cite-bracket">]</span></a></sup><sup class="noprint Inline-Template" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Verifiability" title="Wikipedia:Verifiability"><span title="The material near this tag needs to be fact-checked with the cited source(s). (March 2025)">verification needed</span></a></i>]</sup> This technique effectively reduces computational cost during inference. </p> <div class="mw-heading mw-heading2"><h2 id="Overview_of_models">Overview of models</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=8" title="Edit section: Overview of models"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1251242444">.mw-parser-output .ambox{border:1px solid #a2a9b1;border-left:10px solid #36c;background-color:#fbfbfb;box-sizing:border-box}.mw-parser-output .ambox+link+.ambox,.mw-parser-output .ambox+link+style+.ambox,.mw-parser-output .ambox+link+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+style+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+link+.ambox{margin-top:-1px}html body.mediawiki .mw-parser-output .ambox.mbox-small-left{margin:4px 1em 4px 0;overflow:hidden;width:238px;border-collapse:collapse;font-size:88%;line-height:1.25em}.mw-parser-output .ambox-speedy{border-left:10px solid #b32424;background-color:#fee7e6}.mw-parser-output .ambox-delete{border-left:10px solid #b32424}.mw-parser-output .ambox-content{border-left:10px solid #f28500}.mw-parser-output .ambox-style{border-left:10px solid #fc3}.mw-parser-output .ambox-move{border-left:10px solid #9932cc}.mw-parser-output .ambox-protection{border-left:10px solid #a2a9b1}.mw-parser-output .ambox .mbox-text{border:none;padding:0.25em 0.5em;width:100%}.mw-parser-output .ambox .mbox-image{border:none;padding:2px 0 2px 0.5em;text-align:center}.mw-parser-output .ambox .mbox-imageright{border:none;padding:2px 0.5em 2px 0;text-align:center}.mw-parser-output .ambox .mbox-empty-cell{border:none;padding:0;width:1px}.mw-parser-output .ambox .mbox-image-div{width:52px}@media(min-width:720px){.mw-parser-output .ambox{margin:0 10%}}@media print{body.ns-0 .mw-parser-output .ambox{display:none!important}}</style><table class="box-Technical plainlinks metadata ambox ambox-style ambox-technical" role="presentation"><tbody><tr><td class="mbox-image"><div class="mbox-image-div"><span typeof="mw:File"><span><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/40px-Edit-clear.svg.png" decoding="async" width="40" height="40" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/60px-Edit-clear.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/80px-Edit-clear.svg.png 2x" data-file-width="48" data-file-height="48" /></span></span></div></td><td class="mbox-text"><div class="mbox-text-span">This section <b>may be too technical for most readers to understand</b>.<span class="hide-when-compact"> Please <a class="external text" href="https://en.wikipedia.org/w/index.php?title=DeepSeek&action=edit">help improve it</a> to <a href="/wiki/Wikipedia:Make_technical_articles_understandable" title="Wikipedia:Make technical articles understandable">make it understandable to non-experts</a>, without removing the technical details.</span> <span class="date-container"><i>(<span class="date">January 2025</span>)</i></span><span class="hide-when-compact"><i> (<small><a href="/wiki/Help:Maintenance_template_removal" title="Help:Maintenance template removal">Learn how and when to remove this message</a></small>)</i></span></div></td></tr></tbody></table> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1251242444" /><table class="box-Primary_sources plainlinks metadata ambox ambox-content ambox-Primary_sources" role="presentation"><tbody><tr><td class="mbox-image"><div class="mbox-image-div"><span typeof="mw:File"><a href="/wiki/File:Question_book-new.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/en/thumb/9/99/Question_book-new.svg/50px-Question_book-new.svg.png" decoding="async" width="50" height="39" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/99/Question_book-new.svg/75px-Question_book-new.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/99/Question_book-new.svg/100px-Question_book-new.svg.png 2x" data-file-width="512" data-file-height="399" /></a></span></div></td><td class="mbox-text"><div class="mbox-text-span">This section <b>relies excessively on <a href="/wiki/Wikipedia:Verifiability" title="Wikipedia:Verifiability">references</a> to <a href="/wiki/Wikipedia:No_original_research#Primary,_secondary_and_tertiary_sources" title="Wikipedia:No original research">primary sources</a></b>.<span class="hide-when-compact"> Please improve this section by adding <a href="/wiki/Wikipedia:No_original_research#Primary,_secondary_and_tertiary_sources" title="Wikipedia:No original research">secondary or tertiary sources</a>. <br /><small><span class="plainlinks"><i>Find sources:</i> <a rel="nofollow" class="external text" href="https://www.google.com/search?as_eq=wikipedia&q=%22DeepSeek%22">"DeepSeek"</a> – <a rel="nofollow" class="external text" href="https://www.google.com/search?tbm=nws&q=%22DeepSeek%22+-wikipedia&tbs=ar:1">news</a> <b>·</b> <a rel="nofollow" class="external text" href="https://www.google.com/search?&q=%22DeepSeek%22&tbs=bkt:s&tbm=bks">newspapers</a> <b>·</b> <a rel="nofollow" class="external text" href="https://www.google.com/search?tbs=bks:1&q=%22DeepSeek%22+-wikipedia">books</a> <b>·</b> <a rel="nofollow" class="external text" href="https://scholar.google.com/scholar?q=%22DeepSeek%22">scholar</a> <b>·</b> <a rel="nofollow" class="external text" href="https://www.jstor.org/action/doBasicSearch?Query=%22DeepSeek%22&acc=on&wc=on">JSTOR</a></span></small></span> <span class="date-container"><i>(<span class="date">February 2025</span>)</i></span><span class="hide-when-compact"><i> (<small><a href="/wiki/Help:Maintenance_template_removal" title="Help:Maintenance template removal">Learn how and when to remove this message</a></small>)</i></span></div></td></tr></tbody></table> <p>DeepSeek's models are "open weight", which provides less freedom for modification than true <a href="/wiki/Open_source" title="Open source">open source</a> software.<sup id="cite_ref-Delbert_19-1" class="reference"><a href="#cite_note-Delbert-19"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-52" class="reference"><a href="#cite_note-52"><span class="cite-bracket">[</span>49<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="DeepSeek_Coder">DeepSeek Coder</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=9" title="Edit section: DeepSeek Coder"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>DeepSeek Coder is a series of eight models, four pretrained (<code>Base</code>) and four instruction-finetuned (<code>Instruct</code>). All have 16K context lengths. The model was made <a href="/wiki/Source-available" class="mw-redirect" title="Source-available">source-available</a> under the DeepSeek License, which includes "open and responsible downstream usage" restrictions.<sup id="cite_ref-53" class="reference"><a href="#cite_note-53"><span class="cite-bracket">[</span>50<span class="cite-bracket">]</span></a></sup> </p><p>The <a href="/wiki/Training,_validation,_and_test_data_sets" title="Training, validation, and test data sets">training</a> program was:<sup id="cite_ref-Guo-2024_54-0" class="reference"><a href="#cite_note-Guo-2024-54"><span class="cite-bracket">[</span>51<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-55" class="reference"><a href="#cite_note-55"><span class="cite-bracket">[</span>52<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-56" class="reference"><a href="#cite_note-56"><span class="cite-bracket">[</span>53<span class="cite-bracket">]</span></a></sup> </p> <ol><li>Pretraining: 1.8T tokens (87% source code, 10% code-related English (GitHub markdown and <a href="/wiki/Stack_Exchange" title="Stack Exchange">Stack Exchange</a>), and 3% code-unrelated Chinese).</li> <li>Long-context pretraining: 200B tokens. This extends the context length from 4K to 16K. This produced the <code>Base</code> models.</li> <li>Supervised <a href="/wiki/Fine-tuning_(deep_learning)" title="Fine-tuning (deep learning)">finetuning</a> (SFT): 2B tokens of instruction data. This produced the <code>Instruct</code> models.</li></ol> <p>They were trained on clusters of A100 and <a href="/wiki/Hopper_(microarchitecture)" title="Hopper (microarchitecture)">H800</a> Nvidia GPUs, connected by <a href="/wiki/InfiniBand" title="InfiniBand">InfiniBand</a>, <a href="/wiki/NVLink" title="NVLink">NVLink</a>, <a href="/wiki/NVSwitch" class="mw-redirect" title="NVSwitch">NVSwitch</a>.<sup id="cite_ref-Guo-2024_54-1" class="reference"><a href="#cite_note-Guo-2024-54"><span class="cite-bracket">[</span>51<span class="cite-bracket">]</span></a></sup> </p> <table class="wikitable sortable"> <caption>DeepSeek Coder properties<sup id="cite_ref-Guo-2024_54-2" class="reference"><a href="#cite_note-Guo-2024-54"><span class="cite-bracket">[</span>51<span class="cite-bracket">]</span></a></sup><sup class="reference nowrap"><span title="Location: Table 2">: Table 2 </span></sup><sup id="cite_ref-57" class="reference"><a href="#cite_note-57"><span class="cite-bracket">[</span>54<span class="cite-bracket">]</span></a></sup> </caption> <tbody><tr> <th><abbr title="Parameters">Params</abbr>.</th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{layers}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>layers</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{layers}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2d66b68f7f67d3a1a0c4a42ec4c57f2cd151287b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:5.797ex; height:2.343ex;" alt="{\displaystyle n_{\text{layers}}}" /></span></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle d_{\text{model}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>model</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle d_{\text{model}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/aefdfb00976a3a5c5ec3c8fcbcc166e82ceb6268" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:5.733ex; height:2.509ex;" alt="{\displaystyle d_{\text{model}}}" /></span> </th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle d_{\text{intermediate}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>intermediate</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle d_{\text{intermediate}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/223a7fb6567eb209f3cf71cdffecfc73e137c89a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:10.489ex; height:2.509ex;" alt="{\displaystyle d_{\text{intermediate}}}" /></span></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{heads}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>heads</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{heads}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c54b61bfa2fdac01cada7ff302b934b5831f7293" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:5.655ex; height:2.009ex;" alt="{\displaystyle n_{\text{heads}}}" /></span></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{kv-heads}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>kv-heads</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{kv-heads}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/39526557c4f3411574cfb40d2dbf921c32e1b082" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:7.938ex; height:2.009ex;" alt="{\displaystyle n_{\text{kv-heads}}}" /></span> </th></tr> <tr> <td>1.3B</td> <td>24</td> <td>2048 </td> <td>5504</td> <td>16</td> <td>16 </td></tr> <tr> <td>5.7B </td> <td>32 </td> <td>4096 </td> <td>11008 </td> <td>32 </td> <td>1<sup id="cite_ref-fn1_58-0" class="reference"><a href="#cite_note-fn1-58"><span class="cite-bracket">[</span>note 2<span class="cite-bracket">]</span></a></sup> </td></tr> <tr> <td>6.7B</td> <td>32</td> <td>4096 </td> <td>11008</td> <td>32</td> <td>32 </td></tr> <tr> <td>33B</td> <td>62</td> <td>7168 </td> <td>19200</td> <td>56</td> <td>7<sup id="cite_ref-fn1_58-1" class="reference"><a href="#cite_note-fn1-58"><span class="cite-bracket">[</span>note 2<span class="cite-bracket">]</span></a></sup> </td></tr></tbody></table> <div class="mw-heading mw-heading3"><h3 id="DeepSeek-LLM">DeepSeek-LLM</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=10" title="Edit section: DeepSeek-LLM"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The DeepSeek-LLM series was released in November 2023. It has 7B and 67B parameters in both Base and Chat forms. DeepSeek's accompanying paper claimed benchmark results higher than <a href="/wiki/Llama_2" class="mw-redirect" title="Llama 2">Llama 2</a> and most open-source LLMs at the time.<sup id="cite_ref-Dong,_Kai-2024_36-2" class="reference"><a href="#cite_note-Dong,_Kai-2024-36"><span class="cite-bracket">[</span>34<span class="cite-bracket">]</span></a></sup><sup class="reference nowrap"><span title="Location: section 5">: section 5 </span></sup> The model code is under the source-available DeepSeek License.<sup id="cite_ref-59" class="reference"><a href="#cite_note-59"><span class="cite-bracket">[</span>55<span class="cite-bracket">]</span></a></sup> </p><p>The architecture was essentially the same as the <a href="/wiki/Llama_(language_model)" title="Llama (language model)">Llama</a> series. They used the <a href="/wiki/Transformer_(deep_learning_architecture)#pre-LN" title="Transformer (deep learning architecture)">pre-norm</a> <a href="/wiki/Transformer_(deep_learning_architecture)#decoder-only" title="Transformer (deep learning architecture)">decoder-only Transformer</a> with <a href="/wiki/RMSNorm" class="mw-redirect" title="RMSNorm">RMSNorm</a> as the normalization, <a href="/wiki/SwiGLU" class="mw-redirect" title="SwiGLU">SwiGLU</a> in the feedforward layers, <a href="/wiki/Rotary_positional_embedding" class="mw-redirect" title="Rotary positional embedding">rotary positional embedding</a> (RoPE), and <a href="/wiki/Grouped-query_attention" class="mw-redirect" title="Grouped-query attention">grouped-query attention</a> (GQA). Both had vocabulary size 102,400 (<a href="/wiki/Byte_pair_encoding#Byte-level_BPE" title="Byte pair encoding">byte-level BPE</a>) and context length of 4096. They trained on 2 trillion tokens of English and Chinese text obtained by deduplicating the <a href="/wiki/Common_Crawl" title="Common Crawl">Common Crawl</a>.<sup id="cite_ref-Dong,_Kai-2024_36-3" class="reference"><a href="#cite_note-Dong,_Kai-2024-36"><span class="cite-bracket">[</span>34<span class="cite-bracket">]</span></a></sup> </p> <table class="wikitable sortable"> <caption>DeepSeek LLM properties<sup id="cite_ref-Dong,_Kai-2024_36-4" class="reference"><a href="#cite_note-Dong,_Kai-2024-36"><span class="cite-bracket">[</span>34<span class="cite-bracket">]</span></a></sup><sup class="reference nowrap"><span title="Location: Table 2">: Table 2 </span></sup> </caption> <tbody><tr> <th><abbr title="Parameters">Params</abbr>.</th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{layers}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>layers</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{layers}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2d66b68f7f67d3a1a0c4a42ec4c57f2cd151287b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:5.797ex; height:2.343ex;" alt="{\displaystyle n_{\text{layers}}}" /></span></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle d_{\text{model}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>model</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle d_{\text{model}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/aefdfb00976a3a5c5ec3c8fcbcc166e82ceb6268" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:5.733ex; height:2.509ex;" alt="{\displaystyle d_{\text{model}}}" /></span> </th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle d_{\text{intermediate}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>intermediate</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle d_{\text{intermediate}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/223a7fb6567eb209f3cf71cdffecfc73e137c89a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:10.489ex; height:2.509ex;" alt="{\displaystyle d_{\text{intermediate}}}" /></span></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{heads}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>heads</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{heads}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c54b61bfa2fdac01cada7ff302b934b5831f7293" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:5.655ex; height:2.009ex;" alt="{\displaystyle n_{\text{heads}}}" /></span></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{kv-heads}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>kv-heads</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{kv-heads}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/39526557c4f3411574cfb40d2dbf921c32e1b082" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:7.938ex; height:2.009ex;" alt="{\displaystyle n_{\text{kv-heads}}}" /></span> </th></tr> <tr> <td>7B</td> <td>30</td> <td>4096 </td> <td>11008</td> <td>32</td> <td>32 </td></tr> <tr> <td>67B</td> <td>95</td> <td>8192 </td> <td>22016</td> <td>64</td> <td>8<sup id="cite_ref-fn1_58-2" class="reference"><a href="#cite_note-fn1-58"><span class="cite-bracket">[</span>note 2<span class="cite-bracket">]</span></a></sup> </td></tr></tbody></table> <p>The Chat versions of the two Base models was released concurrently, obtained by training Base by <a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">supervised finetuning (SFT) followed by direct policy optimization (DPO)</a>.<sup id="cite_ref-Dong,_Kai-2024_36-5" class="reference"><a href="#cite_note-Dong,_Kai-2024-36"><span class="cite-bracket">[</span>34<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading4"><h4 id="MoE">MoE</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=11" title="Edit section: MoE"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>DeepSeek-MoE models (Base and Chat), each have 16B parameters (2.7B activated per token, 4K context length). The training was essentially the same as DeepSeek-LLM 7B, and was trained on a part of its training dataset. They claimed performance comparable to a 16B MoE as a 7B non-MoE. It is a variant of the standard <a href="/wiki/Mixture_of_experts#Sparsely-gated_MoE_layer" title="Mixture of experts">sparsely-gated MoE</a>, with "shared experts" that are always queried, and "routed experts" that might not be. They found this to help with expert balancing. In standard MoE, some experts can become overused, while others are rarely used, wasting space. Attempting to balance expert usage causes experts to replicate the same capacity. They proposed the shared experts to learn core capacities that are often used, and let the routed experts learn peripheral capacities that are rarely used.<sup id="cite_ref-Dai-2024_37-3" class="reference"><a href="#cite_note-Dai-2024-37"><span class="cite-bracket">[</span>35<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading4"><h4 id="Math">Math</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=12" title="Edit section: Math"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>DeepSeek-Math includes 3 models: Base, Instruct, and RL. Math was trained as follows:<sup id="cite_ref-PL_38-1" class="reference"><a href="#cite_note-PL-38"><span class="cite-bracket">[</span>36<span class="cite-bracket">]</span></a></sup> </p> <ol><li>Initialize with a previously pretrained DeepSeek-Coder Base v1.5 7B.</li> <li>Further pretrain with 500B tokens (6% DeepSeekMath Corpus, 4% AlgebraicStack, 10% arXiv, 20% GitHub code, 10% Common Crawl). This produced Base.</li> <li>Train an instruction-following model by SFT Base with 776K math problems and tool-use-integrated step-by-step solutions. This produced Instruct.</li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a> (RL): The reward model was a <a href="/wiki/Reasoning_language_model#PRM" title="Reasoning language model">process reward model</a> (PRM) trained from Base according to the Math-Shepherd method.<sup id="cite_ref-60" class="reference"><a href="#cite_note-60"><span class="cite-bracket">[</span>56<span class="cite-bracket">]</span></a></sup> This reward model was then used to train Instruct using <a href="/wiki/Group_Relative_Policy_Optimization" class="mw-redirect" title="Group Relative Policy Optimization">Group Relative Policy Optimization</a> (GRPO) on a dataset of 144K math questions "related to <a href="/wiki/Language_model_benchmark" title="Language model benchmark">GSM8K and MATH</a>". The reward model was continuously updated during training to avoid reward hacking. This resulted in RL.</li></ol> <div class="mw-heading mw-heading3"><h3 id="V2">V2</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=13" title="Edit section: V2"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:DeepSeek_MoE_and_MLA_(DeepSeek-V2).svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/2/20/DeepSeek_MoE_and_MLA_%28DeepSeek-V2%29.svg/330px-DeepSeek_MoE_and_MLA_%28DeepSeek-V2%29.svg.png" decoding="async" width="310" height="249" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/2/20/DeepSeek_MoE_and_MLA_%28DeepSeek-V2%29.svg/500px-DeepSeek_MoE_and_MLA_%28DeepSeek-V2%29.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/2/20/DeepSeek_MoE_and_MLA_%28DeepSeek-V2%29.svg/960px-DeepSeek_MoE_and_MLA_%28DeepSeek-V2%29.svg.png 2x" data-file-width="1349" data-file-height="1085" /></a><figcaption>The architecture of V2, showing both shared-routed MoE and MLA<sup id="cite_ref-Ruan,_Chong-2024_61-0" class="reference"><a href="#cite_note-Ruan,_Chong-2024-61"><span class="cite-bracket">[</span>57<span class="cite-bracket">]</span></a></sup><sup class="reference nowrap"><span title="Location: Figure 2">: Figure 2 </span></sup></figcaption></figure> <p>In May 2024, DeepSeek released the DeepSeek-V2 series. The series includes 4 models, 2 base models (DeepSeek-V2, DeepSeek-V2 Lite) and 2 chatbots (Chat). The two larger models were trained as follows:<sup id="cite_ref-Ruan,_Chong-2024_61-1" class="reference"><a href="#cite_note-Ruan,_Chong-2024-61"><span class="cite-bracket">[</span>57<span class="cite-bracket">]</span></a></sup> </p> <ol><li>Pretrain on a dataset of 8.1T tokens, using 12% more Chinese tokens than English ones.</li> <li>Extend context length from 4K to 128K using YaRN.<sup id="cite_ref-Peng-2023_62-0" class="reference"><a href="#cite_note-Peng-2023-62"><span class="cite-bracket">[</span>58<span class="cite-bracket">]</span></a></sup> This resulted in DeepSeek-V2.</li> <li>SFT with 1.2M instances for helpfulness and 0.3M for safety. This resulted in Chat SFT, which was not released.</li> <li>RL using GRPO in two stages. The first stage was trained to solve math and coding problems. This stage used 1 reward model, trained on compiler feedback (for coding) and ground-truth labels (for math). The second stage was trained to be helpful, safe, and follow rules. This stage used 3 reward models. The helpfulness and safety reward models were trained on human preference data. The rule-based reward model was manually programmed. All trained reward models were initialized from Chat (SFT). This resulted in the released version of Chat.</li></ol> <p>They opted for 2-staged RL, because they found that RL on reasoning data had "unique characteristics" different from RL on general data. For example, RL on reasoning could improve over more training steps.<sup id="cite_ref-Ruan,_Chong-2024_61-2" class="reference"><a href="#cite_note-Ruan,_Chong-2024-61"><span class="cite-bracket">[</span>57<span class="cite-bracket">]</span></a></sup> </p><p>The two V2-Lite models were smaller, and trained similarly. DeepSeek-V2 Lite-Chat underwent only SFT, not RL. They trained the Lite version to help "further research and development on MLA and DeepSeekMoE".<sup id="cite_ref-Ruan,_Chong-2024_61-3" class="reference"><a href="#cite_note-Ruan,_Chong-2024-61"><span class="cite-bracket">[</span>57<span class="cite-bracket">]</span></a></sup> </p><p>Architecturally, the V2 models were significantly different from the DeepSeek LLM series. They changed the standard attention mechanism by a <a href="/wiki/Low-rank_approximation" title="Low-rank approximation">low-rank approximation</a> called <a href="/wiki/Transformer_(deep_learning_architecture)#MLA" title="Transformer (deep learning architecture)">multi-head latent attention</a> (MLA), and used the previously published <a href="/wiki/Mixture_of_experts" title="Mixture of experts">mixture of experts</a> (MoE) variant.<sup id="cite_ref-Dai-2024_37-4" class="reference"><a href="#cite_note-Dai-2024-37"><span class="cite-bracket">[</span>35<span class="cite-bracket">]</span></a></sup> </p> <table class="wikitable sortable"> <caption>DeepSeek V2 properties<sup id="cite_ref-Ruan,_Chong-2024_61-4" class="reference"><a href="#cite_note-Ruan,_Chong-2024-61"><span class="cite-bracket">[</span>57<span class="cite-bracket">]</span></a></sup><sup class="reference nowrap"><span title="Location: Section 3.1.2, Appendix B">: Section 3.1.2, Appendix B </span></sup><sup id="cite_ref-63" class="reference"><a href="#cite_note-63"><span class="cite-bracket">[</span>59<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-64" class="reference"><a href="#cite_note-64"><span class="cite-bracket">[</span>60<span class="cite-bracket">]</span></a></sup> </caption> <tbody><tr> <th>Name </th> <th><abbr title="Parameters">Params</abbr>. </th> <th>Active <abbr title="Parameters">params</abbr></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{layers}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>layers</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{layers}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2d66b68f7f67d3a1a0c4a42ec4c57f2cd151287b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:5.797ex; height:2.343ex;" alt="{\displaystyle n_{\text{layers}}}" /></span></th> <th>Context length </th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{shared experts}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>shared experts</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{shared experts}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e9552181ba964708f095d9cd3ae028ad978493f8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:12.054ex; height:2.343ex;" alt="{\displaystyle n_{\text{shared experts}}}" /></span></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{routed experts}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>routed experts</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{routed experts}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/67b73b5d26a8e0bcd1def06de81ef51bf4c50283" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:12.046ex; height:2.343ex;" alt="{\displaystyle n_{\text{routed experts}}}" /></span> </th></tr> <tr> <td>V2-Lite </td> <td>15.7B </td> <td>2.4B</td> <td>27</td> <td>32K </td> <td>2</td> <td>64 </td></tr> <tr> <td>V2 </td> <td>236B </td> <td>21B</td> <td>60</td> <td>128K </td> <td>2</td> <td>160 </td></tr></tbody></table> <p>The <i><a href="/wiki/Financial_Times" title="Financial Times">Financial Times</a></i> reported that it was cheaper than its peers with a price of 2 <a href="/wiki/Renminbi" title="Renminbi">RMB</a> for every million output tokens. The <a href="/wiki/University_of_Waterloo" title="University of Waterloo">University of Waterloo</a> Tiger Lab's leaderboard ranked DeepSeek-V2 seventh on its LLM ranking.<sup id="cite_ref-McMorrow-2024_35-1" class="reference"><a href="#cite_note-McMorrow-2024-35"><span class="cite-bracket">[</span>33<span class="cite-bracket">]</span></a></sup> </p><p>The DeepSeek-Coder V2 series included V2-Base, V2-Lite-Base, V2-Instruct, and V20-Lite-Instruct.. Training:<sup id="cite_ref-V2_39-5" class="reference"><a href="#cite_note-V2-39"><span class="cite-bracket">[</span>37<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-65" class="reference"><a href="#cite_note-65"><span class="cite-bracket">[</span>note 3<span class="cite-bracket">]</span></a></sup> </p> <ol><li>Base models were initialized from corresponding intermediate checkpoints after pretraining on 4.2T tokens (not the version at the end of pretraining), then pretrained further for 6T tokens, then context-extended to 128K context length.</li> <li>DeepSeek-Coder and DeepSeek-Math were used to generate 20K code-related and 30K math-related instruction data, then combined with an instruction dataset of 300M tokens. This was used for SFT.</li> <li>RL with GRPO. The reward for math problems was computed by comparing with the ground-truth label. The reward for code problems was generated by a reward model trained to predict whether a program would pass the unit tests.</li></ol> <p>DeepSeek-V2.5 was made by combining DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct.<sup id="cite_ref-HF_40-1" class="reference"><a href="#cite_note-HF-40"><span class="cite-bracket">[</span>38<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="V3">V3</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=14" title="Edit section: V3"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Multi-Token_Prediction_(DeepSeek)_01.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/0/0b/Multi-Token_Prediction_%28DeepSeek%29_01.svg/310px-Multi-Token_Prediction_%28DeepSeek%29_01.svg.png" decoding="async" width="310" height="138" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/0/0b/Multi-Token_Prediction_%28DeepSeek%29_01.svg/465px-Multi-Token_Prediction_%28DeepSeek%29_01.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/0/0b/Multi-Token_Prediction_%28DeepSeek%29_01.svg/620px-Multi-Token_Prediction_%28DeepSeek%29_01.svg.png 2x" data-file-width="1560" data-file-height="695" /></a><figcaption>Multi-Token Prediction</figcaption></figure> <p>DeepSeek-V3-Base and DeepSeek-V3 (a chat model) use essentially the same architecture as V2 with the addition of <a href="/wiki/Transformer_(deep_learning_architecture)#Multi-Token_Prediction" title="Transformer (deep learning architecture)">multi-token prediction</a>, which (optionally) decodes extra tokens faster but less accurately. Training:<sup id="cite_ref-Deng,_Chengqi-2024_30-3" class="reference"><a href="#cite_note-Deng,_Chengqi-2024-30"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup> </p> <ol><li>Pretraining on 14.8T tokens of a multilingual corpus, mostly English and Chinese. It contained a higher ratio of math and programming than the pretraining dataset of V2.</li> <li>Extend context length twice, from 4K to 32K and then to 128K, using YaRN.<sup id="cite_ref-Peng-2023_62-1" class="reference"><a href="#cite_note-Peng-2023-62"><span class="cite-bracket">[</span>58<span class="cite-bracket">]</span></a></sup> This produced DeepSeek-V3-Base.</li> <li>SFT for 2 epochs on 1.5M samples of reasoning (math, programming, logic) and non-reasoning (creative writing, roleplay, simple question answering) data. Reasoning data was generated by "expert models". Non-reasoning data was generated by DeepSeek-V2.5 and checked by humans. <ul><li>The "expert models" were trained by starting with an unspecified base model, then SFT on both <problem, original response> data, and synthetic <system prompt, prompt, problem, R1 response> data generated by an internal DeepSeek-R1-Lite model. The system prompt asked R1 to reflect and verify during thinking. Then the expert models were RL using an undisclosed reward function.</li> <li>Each expert model was trained to generate just synthetic reasoning data in one specific domain (math, programming, logic).</li> <li>Expert models were used instead of R1 itself, since the output from R1 itself suffered "overthinking, poor formatting, and excessive length".</li></ul></li> <li>Model-based reward models were made by starting with a SFT checkpoint of V3, then finetuning on human preference data containing both final reward and chain-of-thought leading to the final reward. The reward model produced reward signals for both questions with objective but free-form answers, and questions without objective answers (such as creative writing).</li> <li>An SFT checkpoint of V3 was trained by GRPO using both reward models and rule-based reward. The rule-based reward was computed for math problems with a final answer (put in a box), and for programming problems by unit tests. This produced DeepSeek-V3.</li></ol> <table class="wikitable sortable"> <caption>DeepSeek V3 properties<sup id="cite_ref-Deng,_Chengqi-2024_30-4" class="reference"><a href="#cite_note-Deng,_Chengqi-2024-30"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup><sup class="reference nowrap"><span title="Location: Section 4.2">: Section 4.2 </span></sup><sup id="cite_ref-66" class="reference"><a href="#cite_note-66"><span class="cite-bracket">[</span>61<span class="cite-bracket">]</span></a></sup> </caption> <tbody><tr> <th>Name </th> <th><abbr title="Parameters">Params</abbr>. </th> <th>Active <abbr title="Parameters">params</abbr></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{layers}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>layers</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{layers}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2d66b68f7f67d3a1a0c4a42ec4c57f2cd151287b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:5.797ex; height:2.343ex;" alt="{\displaystyle n_{\text{layers}}}" /></span></th> <th>Context length </th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{shared experts}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>shared experts</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{shared experts}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e9552181ba964708f095d9cd3ae028ad978493f8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:12.054ex; height:2.343ex;" alt="{\displaystyle n_{\text{shared experts}}}" /></span></th> <th><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n_{\text{routed experts}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mtext>routed experts</mtext> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n_{\text{routed experts}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/67b73b5d26a8e0bcd1def06de81ef51bf4c50283" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:12.046ex; height:2.343ex;" alt="{\displaystyle n_{\text{routed experts}}}" /></span> </th></tr> <tr> <td>V3 </td> <td>671B </td> <td>37B</td> <td>61</td> <td>128K </td> <td>1</td> <td>256 </td></tr></tbody></table> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Mixed-precision_training_in_DeepSeek_V3.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/95/Mixed-precision_training_in_DeepSeek_V3.svg/330px-Mixed-precision_training_in_DeepSeek_V3.svg.png" decoding="async" width="310" height="89" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/95/Mixed-precision_training_in_DeepSeek_V3.svg/500px-Mixed-precision_training_in_DeepSeek_V3.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/95/Mixed-precision_training_in_DeepSeek_V3.svg/620px-Mixed-precision_training_in_DeepSeek_V3.svg.png 2x" data-file-width="679" data-file-height="196" /></a><figcaption>Mixed-precision framework for <code>V3</code><sup id="cite_ref-Deng,_Chengqi-2024_30-5" class="reference"><a href="#cite_note-Deng,_Chengqi-2024-30"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup><sup class="reference nowrap"><span title="Location: Figure 6">: Figure 6 </span></sup></figcaption></figure> <p>The DeepSeek team performed extensive low-level engineering to improve efficiency. They used <a href="/wiki/Mixed-precision_arithmetic" title="Mixed-precision arithmetic">mixed-precision arithmetic</a>. Much of the forward pass was performed in <a href="/wiki/Floating-point_arithmetic" title="Floating-point arithmetic">8-bit floating point numbers</a> (5E2M: 5-bit exponent and 2-bit <a href="/wiki/Mantissa_(floating_point_number)" class="mw-redirect" title="Mantissa (floating point number)">mantissa</a>) rather than the standard <a href="/wiki/Single-precision_floating-point_format" title="Single-precision floating-point format">32-bit</a>, requiring special <a href="/wiki/General_matrix_multiply" class="mw-redirect" title="General matrix multiply">GEMM</a> routines to accumulate accurately. They used a custom 12-bit float (E5M6) only for the inputs to the linear layers after the attention modules. Optimizer states were in 16-bit (<a href="/wiki/Bfloat16_floating-point_format" title="Bfloat16 floating-point format">BF16</a>). They minimized communication latency by extensively overlapping computation and communication, such as dedicating 20 streaming multiprocessors out of 132 per H800 for only inter-GPU communication. They lowered communication by rearranging (every 10 minutes) the exact machine each expert was on so as to avoid querying certain machines more often than others, adding auxiliary load-balancing losses to the training loss function, and other load-balancing techniques.<sup id="cite_ref-Deng,_Chengqi-2024_30-6" class="reference"><a href="#cite_note-Deng,_Chengqi-2024-30"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup> </p><p>After training, it was deployed on clusters of H800 GPUs. The 8 H800 GPUs within a cluster were connected by NVLink, and the clusters were connected by InfiniBand.<sup id="cite_ref-Deng,_Chengqi-2024_30-7" class="reference"><a href="#cite_note-Deng,_Chengqi-2024-30"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup> </p> <table class="wikitable"> <caption>Total cost of training the DeepSeek-V3 model<sup id="cite_ref-Deng,_Chengqi-2024_30-8" class="reference"><a href="#cite_note-Deng,_Chengqi-2024-30"><span class="cite-bracket">[</span>28<span class="cite-bracket">]</span></a></sup><sup class="reference nowrap"><span title="Location: Table 1">: Table 1 </span></sup> </caption> <tbody><tr> <th>Stage </th> <th>Cost (in one thousand GPU hours) </th> <th>Cost (in one million USD$) </th></tr> <tr> <td>Pre-training </td> <td>2,664 </td> <td>5.328 </td></tr> <tr> <td>Context extension </td> <td>119 </td> <td>0.24 </td></tr> <tr> <td>Fine-tuning </td> <td>5 </td> <td>0.01 </td></tr> <tr> <td>Total </td> <td>2,788 </td> <td>5.576 </td></tr></tbody></table> <p>The cost has been discussed<sup id="cite_ref-67" class="reference"><a href="#cite_note-67"><span class="cite-bracket">[</span>62<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-68" class="reference"><a href="#cite_note-68"><span class="cite-bracket">[</span>63<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-69" class="reference"><a href="#cite_note-69"><span class="cite-bracket">[</span>64<span class="cite-bracket">]</span></a></sup> and called misleading, because it covers only parts of the true cost.<sup id="cite_ref-70" class="reference"><a href="#cite_note-70"><span class="cite-bracket">[</span>65<span class="cite-bracket">]</span></a></sup> </p><p>Benchmark tests show that V3 outperformed <a href="/wiki/Llama_(language_model)" title="Llama (language model)">Llama</a> 3.1 and <a href="/wiki/Qwen" title="Qwen">Qwen</a> 2.5 while matching <a href="/wiki/GPT-4o" title="GPT-4o">GPT-4o</a> and <a href="/wiki/Claude_(language_model)" title="Claude (language model)">Claude</a> 3.5 Sonnet.<sup id="cite_ref-scmp_1_January_2025_34-3" class="reference"><a href="#cite_note-scmp_1_January_2025-34"><span class="cite-bracket">[</span>32<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-71" class="reference"><a href="#cite_note-71"><span class="cite-bracket">[</span>66<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-72" class="reference"><a href="#cite_note-72"><span class="cite-bracket">[</span>67<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-73" class="reference"><a href="#cite_note-73"><span class="cite-bracket">[</span>68<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="R1">R1</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=15" title="Edit section: R1"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951" /><div role="note" class="hatnote navigation-not-searchable">See also: <a href="/wiki/Reflection_(artificial_intelligence)" title="Reflection (artificial intelligence)">Reflection (artificial intelligence)</a></div> <p>In January 2025, DeepSeek released the DeepSeek-R1 model under the <a href="/wiki/MIT_License" title="MIT License">MIT License</a>.<sup id="cite_ref-74" class="reference"><a href="#cite_note-74"><span class="cite-bracket">[</span>69<span class="cite-bracket">]</span></a></sup> </p><p>DeepSeek-R1-Lite-Preview<sup id="cite_ref-DSLI_1_41-1" class="reference"><a href="#cite_note-DSLI_1-41"><span class="cite-bracket">[</span>39<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-RP_42-1" class="reference"><a href="#cite_note-RP-42"><span class="cite-bracket">[</span>40<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-75" class="reference"><a href="#cite_note-75"><span class="cite-bracket">[</span>note 4<span class="cite-bracket">]</span></a></sup> was trained for logical inference, mathematical reasoning, and real-time problem-solving. DeepSeek claimed that it exceeded performance of <a href="/wiki/OpenAI_o1" title="OpenAI o1">OpenAI o1</a> on benchmarks such as <a href="/wiki/American_Invitational_Mathematics_Examination" title="American Invitational Mathematics Examination">American Invitational Mathematics Examination</a> (AIME) and MATH.<sup id="cite_ref-76" class="reference"><a href="#cite_note-76"><span class="cite-bracket">[</span>70<span class="cite-bracket">]</span></a></sup> However, <i><a href="/wiki/The_Wall_Street_Journal" title="The Wall Street Journal">The Wall Street Journal</a></i> reported that on 15 problems from the 2024 edition of AIME, the o1 model reached a solution faster.<sup id="cite_ref-77" class="reference"><a href="#cite_note-77"><span class="cite-bracket">[</span>71<span class="cite-bracket">]</span></a></sup> </p><p>DeepSeek-R1 and DeepSeek-R1-Zero<sup id="cite_ref-78" class="reference"><a href="#cite_note-78"><span class="cite-bracket">[</span>72<span class="cite-bracket">]</span></a></sup> were initialized from DeepSeek-V3-Base and share its architecture. DeepSeek-R1-Distill models were instead initialized from other pretrained open-weight models, including <a href="/wiki/Llama_(language_model)" title="Llama (language model)">LLaMA</a> and <a href="/wiki/Qwen" title="Qwen">Qwen</a>, then fine-tuned on <a href="/wiki/Synthetic_data" title="Synthetic data">synthetic data</a> generated by R1.<sup id="cite_ref-Ma,_Shirong-2025_51-1" class="reference"><a href="#cite_note-Ma,_Shirong-2025-51"><span class="cite-bracket">[</span>48<span class="cite-bracket">]</span></a></sup> </p> <style data-mw-deduplicate="TemplateStyles:r1224211176">.mw-parser-output .quotebox{background-color:#F9F9F9;border:1px solid #aaa;box-sizing:border-box;padding:10px;font-size:88%;max-width:100%}.mw-parser-output .quotebox.floatleft{margin:.5em 1.4em .8em 0}.mw-parser-output .quotebox.floatright{margin:.5em 0 .8em 1.4em}.mw-parser-output .quotebox.centered{overflow:hidden;position:relative;margin:.5em auto .8em auto}.mw-parser-output .quotebox.floatleft span,.mw-parser-output .quotebox.floatright span{font-style:inherit}.mw-parser-output .quotebox>blockquote{margin:0;padding:0;border-left:0;font-family:inherit;font-size:inherit}.mw-parser-output .quotebox-title{text-align:center;font-size:110%;font-weight:bold}.mw-parser-output .quotebox-quote>:first-child{margin-top:0}.mw-parser-output .quotebox-quote:last-child>:last-child{margin-bottom:0}.mw-parser-output .quotebox-quote.quoted:before{font-family:"Times New Roman",serif;font-weight:bold;font-size:large;color:gray;content:" “ ";vertical-align:-45%;line-height:0}.mw-parser-output .quotebox-quote.quoted:after{font-family:"Times New Roman",serif;font-weight:bold;font-size:large;color:gray;content:" ” ";line-height:0}.mw-parser-output .quotebox .left-aligned{text-align:left}.mw-parser-output .quotebox .right-aligned{text-align:right}.mw-parser-output .quotebox .center-aligned{text-align:center}.mw-parser-output .quotebox .quote-title,.mw-parser-output .quotebox .quotebox-quote{display:block}.mw-parser-output .quotebox cite{display:block;font-style:normal}@media screen and (max-width:640px){.mw-parser-output .quotebox{width:100%!important;margin:0 0 .8em!important;float:none!important}}</style><div class="quotebox pullquote floatright" style="width:350px; ;"> <div class="quotebox-title" style="">Template for <code>DeepSeek-R1-Zero</code></div> <blockquote class="quotebox-quote left-aligned" style=""> <p>A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>. User: <prompt>. Assistant: </p> </blockquote> <div style="padding-bottom: 0; padding-top: 0.5em"><cite class="left-aligned" style="">– <prompt> is replaced with the specific reasoning question during training.</cite></div> </div> <p>DeepSeek-R1-Zero was trained exclusively using GRPO RL without SFT. Unlike previous versions, it used no model-based reward. All reward functions were rule-based, "mainly" of two types (other types were not specified): accuracy rewards and format rewards. Accuracy reward was checking whether a boxed answer is correct (for math) or whether a code passes tests (for programming). Format reward was checking whether the model puts its thinking trace within a <think>...</think> tag.<sup id="cite_ref-Ma,_Shirong-2025_51-2" class="reference"><a href="#cite_note-Ma,_Shirong-2025-51"><span class="cite-bracket">[</span>48<span class="cite-bracket">]</span></a></sup> </p><p>R1-Zero has issues with readability and mixing languages. R1 was trained to address these issues and further improve reasoning:<sup id="cite_ref-Ma,_Shirong-2025_51-3" class="reference"><a href="#cite_note-Ma,_Shirong-2025-51"><span class="cite-bracket">[</span>48<span class="cite-bracket">]</span></a></sup> </p> <ol><li>SFT DeepSeek-V3-Base on "thousands" of "cold-start" data all with the standard format of <code>|special_token|<reasoning_process>|special_token|<summary></code>, designed to improve model output readability.</li> <li>Apply the same GRPO RL process as R1-Zero, adding a "language consistency reward" to encourage it to respond monolingually. This produced an un released internal model.</li> <li>Synthesize 600K reasoning data from the internal model, with rejection sampling (i.e. if the generated reasoning had a wrong final answer, then it is removed). Synthesize 200K non-reasoning data (writing, factual QA, self-cognition, translation) using DeepSeek-V3.</li> <li>SFT DeepSeek-V3-Base on the 800K synthetic data for 2 epochs.</li> <li>Apply the same GRPO RL process as R1-Zero with rule-based reward (for reasoning tasks), but also model-based reward (for non-reasoning tasks, helpfulness, and harmlessness). This produced DeepSeek-R1.</li></ol> <p>Distilled models were trained by SFT on 800K data synthesized from DeepSeek-R1, in a similar way as step 3. They were not trained with RL.<sup id="cite_ref-Ma,_Shirong-2025_51-4" class="reference"><a href="#cite_note-Ma,_Shirong-2025-51"><span class="cite-bracket">[</span>48<span class="cite-bracket">]</span></a></sup> </p><p>R2, the successor to R1, is originally planned for release in early May 2025, but release schedule accelerated.<sup id="cite_ref-79" class="reference"><a href="#cite_note-79"><span class="cite-bracket">[</span>73<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Significance">Significance</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=16" title="Edit section: Significance"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>DeepSeek's success against larger and more established rivals has been described as "upending AI".<sup id="cite_ref-Metz-2025b_17-5" class="reference"><a href="#cite_note-Metz-2025b-17"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-80" class="reference"><a href="#cite_note-80"><span class="cite-bracket">[</span>74<span class="cite-bracket">]</span></a></sup> </p><p>The DeepSeek-R1 model provides responses comparable to other contemporary large language models, such as <a href="/wiki/OpenAI" title="OpenAI">OpenAI</a>'s <a href="/wiki/GPT-4o" title="GPT-4o">GPT-4o</a> and <a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a>.<sup id="cite_ref-81" class="reference"><a href="#cite_note-81"><span class="cite-bracket">[</span>75<span class="cite-bracket">]</span></a></sup> Its <a href="/wiki/Training,_validation,_and_test_data_sets" title="Training, validation, and test data sets">training</a> cost is reported to be significantly lower than other LLMs. </p><p>The company claims that it trained V3, a predecessor of R1, for US$6 million compared to $100 million for OpenAI's <a href="/wiki/GPT-4" title="GPT-4">GPT-4</a> in 2023,<sup id="cite_ref-vincent_13-2" class="reference"><a href="#cite_note-vincent-13"><span class="cite-bracket">[</span>11<span class="cite-bracket">]</span></a></sup> and approximately one tenth of the computing power used for <a href="/wiki/Meta_Platforms" title="Meta Platforms">Meta</a>'s comparable model, <a href="/wiki/Llama_(language_model)" title="Llama (language model)">LLaMA 3.1</a>.<sup id="cite_ref-vincent_13-3" class="reference"><a href="#cite_note-vincent-13"><span class="cite-bracket">[</span>11<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-Metz-2025a_14-5" class="reference"><a href="#cite_note-Metz-2025a-14"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-Cosgrove-2025_15-2" class="reference"><a href="#cite_note-Cosgrove-2025-15"><span class="cite-bracket">[</span>13<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-Erdil_16-1" class="reference"><a href="#cite_note-Erdil-16"><span class="cite-bracket">[</span>14<span class="cite-bracket">]</span></a></sup> </p><p>The January 2025 release of the R1 model, which offered significantly lower costs than competing models, some investors anticipated a <a href="/wiki/Price_war" title="Price war">price war</a> in the American AI industry.<sup id="cite_ref-Chow_Perrigo_82-0" class="reference"><a href="#cite_note-Chow_Perrigo-82"><span class="cite-bracket">[</span>76<span class="cite-bracket">]</span></a></sup> It was dubbed the "<a href="/wiki/Pinduoduo" title="Pinduoduo">Pinduoduo</a> of AI", and other Chinese tech giants such as <a href="/wiki/ByteDance" title="ByteDance">ByteDance</a>, <a href="/wiki/Tencent" title="Tencent">Tencent</a>, <a href="/wiki/Baidu" title="Baidu">Baidu</a>, and <a href="/wiki/Alibaba_Group" title="Alibaba Group">Alibaba</a> cut the price of their AI models. Despite its low price, it was profitable compared to its money-losing rivals.<sup id="cite_ref-Schneider-2024_47-1" class="reference"><a href="#cite_note-Schneider-2024-47"><span class="cite-bracket">[</span>44<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=17" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1266661725">.mw-parser-output .portalbox{padding:0;margin:0.5em 0;display:table;box-sizing:border-box;max-width:175px;list-style:none}.mw-parser-output .portalborder{border:1px solid var(--border-color-base,#a2a9b1);padding:0.1em;background:var(--background-color-neutral-subtle,#f8f9fa)}.mw-parser-output .portalbox-entry{display:table-row;font-size:85%;line-height:110%;height:1.9em;font-style:italic;font-weight:bold}.mw-parser-output .portalbox-image{display:table-cell;padding:0.2em;vertical-align:middle;text-align:center}.mw-parser-output .portalbox-link{display:table-cell;padding:0.2em 0.2em 0.2em 0.3em;vertical-align:middle}@media(min-width:720px){.mw-parser-output .portalleft{margin:0.5em 1em 0.5em 0}.mw-parser-output .portalright{clear:right;float:right;margin:0.5em 0 0.5em 1em}}</style><ul role="navigation" aria-label="Portals" class="noprint portalbox portalborder portalright"> <li class="portalbox-entry"><span class="portalbox-image"><span class="noviewer" typeof="mw:File"><span><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/3/31/Free_and_open-source_software_logo_%282009%29.svg/28px-Free_and_open-source_software_logo_%282009%29.svg.png" decoding="async" width="28" height="28" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/31/Free_and_open-source_software_logo_%282009%29.svg/42px-Free_and_open-source_software_logo_%282009%29.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/31/Free_and_open-source_software_logo_%282009%29.svg/56px-Free_and_open-source_software_logo_%282009%29.svg.png 2x" data-file-width="512" data-file-height="512" /></span></span></span><span class="portalbox-link"><a href="/wiki/Portal:Free_and_open-source_software" title="Portal:Free and open-source software">Free and open-source software portal</a></span></li><li class="portalbox-entry"><span class="portalbox-image"><span class="noviewer" typeof="mw:File"><a href="/wiki/File:Emblem-money.svg" class="mw-file-description"><img alt="icon" src="//upload.wikimedia.org/wikipedia/commons/thumb/f/f3/Emblem-money.svg/40px-Emblem-money.svg.png" decoding="async" width="28" height="28" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/f/f3/Emblem-money.svg/42px-Emblem-money.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/f/f3/Emblem-money.svg/56px-Emblem-money.svg.png 2x" data-file-width="48" data-file-height="48" /></a></span></span><span class="portalbox-link"><a href="/wiki/Portal:Business" title="Portal:Business">Business portal</a></span></li><li class="portalbox-entry"><span class="portalbox-image"><span class="mw-image-border noviewer" typeof="mw:File"><span><img alt="flag" src="//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Flag_of_the_People%27s_Republic_of_China.svg/32px-Flag_of_the_People%27s_Republic_of_China.svg.png" decoding="async" width="32" height="21" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Flag_of_the_People%27s_Republic_of_China.svg/48px-Flag_of_the_People%27s_Republic_of_China.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Flag_of_the_People%27s_Republic_of_China.svg/64px-Flag_of_the_People%27s_Republic_of_China.svg.png 2x" data-file-width="900" data-file-height="600" /></span></span></span><span class="portalbox-link"><a href="/wiki/Portal:China" title="Portal:China">China portal</a></span></li></ul> <ul><li><a href="/wiki/DeepSeek_(chatbot)" title="DeepSeek (chatbot)">DeepSeek (chatbot)</a> – Chatbot developed by DeepSeek</li> <li><a href="/wiki/Artificial_intelligence_industry_in_China" title="Artificial intelligence industry in China">Artificial intelligence industry in China</a></li> <li><a href="/wiki/OpenAI" title="OpenAI">OpenAI</a> – Artificial intelligence research organization</li> <li><a href="/wiki/Jevons_paradox" title="Jevons paradox">Jevons paradox</a> – Efficiency leads to increased demand</li></ul> <div class="mw-heading mw-heading2"><h2 id="Notes">Notes</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=18" title="Edit section: Notes"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist reflist-lower-alpha"> <div class="mw-references-wrap"><ol class="references"> <li id="cite_note-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-7">^</a></b></span> <span class="reference-text"><a href="/wiki/Chinese_language" title="Chinese language">Chinese</a>: <span lang="zh">杭州深度求索人工智能基础技术研究有限公司</span>.<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">[</span>6<span class="cite-bracket">]</span></a></sup> Sometimes simply referred to in English as <b>Hangzhou DeepSeek Artificial Intelligence</b>.</span> </li> <li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"><a href="/wiki/Simplified_Chinese_characters" title="Simplified Chinese characters">Chinese</a>: <span lang="zh-Hans">深度求索</span>; <a href="/wiki/Pinyin" title="Pinyin">pinyin</a>: <i><span lang="zh-Latn">Shēndù Qiúsuǒ</span></i></span> </li> </ol></div></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239543626" /><div class="reflist"> <div class="mw-references-wrap"><ol class="references"> <li id="cite_note-45"><span class="mw-cite-backlink"><b><a href="#cite_ref-45">^</a></b></span> <span class="reference-text">宁波程信柔兆企业管理咨询合伙企业(有限合伙) and 宁波程恩企业管理咨询合伙企业(有限合伙)</span> </li> <li id="cite_note-fn1-58"><span class="mw-cite-backlink">^ <a href="#cite_ref-fn1_58-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-fn1_58-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-fn1_58-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text">The number of heads does not equal the number of KV heads, due to GQA.</span> </li> <li id="cite_note-65"><span class="mw-cite-backlink"><b><a href="#cite_ref-65">^</a></b></span> <span class="reference-text">Inexplicably, the model named <code>DeepSeek-Coder-V2 Chat</code> in the paper was released as <code>DeepSeek-Coder-V2-Instruct</code> in HuggingFace.</span> </li> <li id="cite_note-75"><span class="mw-cite-backlink"><b><a href="#cite_ref-75">^</a></b></span> <span class="reference-text">At that time, the <code>R1-Lite-Preview</code> required selecting "Deep Think enabled", and every user could use it only 50 times a day.</span> </li> </ol></div></div> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=19" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239543626" /><div class="reflist reflist-columns references-column-width" style="column-width: 30em;"> <ol class="references"> <li id="cite_note-DeepSeek突传消息!-1"><span class="mw-cite-backlink">^ <a href="#cite_ref-DeepSeek突传消息!_1-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-DeepSeek突传消息!_1-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://finance.sina.com.cn/jjxw/2025-02-01/doc-inehyqcx9694053.shtml">"DeepSeek突传消息"</a>. <i><a href="/wiki/Sina_Corp" class="mw-redirect" title="Sina Corp">Sina Corp</a></i>. 1 February 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">1 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Sina+Corp&rft.atitle=DeepSeek%E7%AA%81%E4%BC%A0%E6%B6%88%E6%81%AF&rft.date=2025-02-01&rft_id=https%3A%2F%2Ffinance.sina.com.cn%2Fjjxw%2F2025-02-01%2Fdoc-inehyqcx9694053.shtml&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWu2025" class="citation news cs1">Wu, Zijing (14 March 2025). <a rel="nofollow" class="external text" href="https://www.ft.com/content/fb5c11bb-1d4b-465f-8283-451a19a3d425">"DeepSeek focuses on research over revenue in contrast to Silicon Valley"</a>. <i><a href="/wiki/Financial_Times" title="Financial Times">Financial Times</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">14 March</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Financial+Times&rft.atitle=DeepSeek+focuses+on+research+over+revenue+in+contrast+to+Silicon+Valley&rft.date=2025-03-14&rft.aulast=Wu&rft.aufirst=Zijing&rft_id=https%3A%2F%2Fwww.ft.com%2Fcontent%2Ffb5c11bb-1d4b-465f-8283-451a19a3d425&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-3">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.bloomberg.com/profile/company/2544189D:CH">"Hangzhou DeepSeek Artificial Intelligence Basic Technology Research Co., Ltd"</a>. <i><a href="/wiki/Bloomberg_L.P." title="Bloomberg L.P.">Bloomberg L.P.</a></i></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Bloomberg+L.P.&rft.atitle=Hangzhou+DeepSeek+Artificial+Intelligence+Basic+Technology+Research+Co.%2C+Ltd.&rft_id=https%3A%2F%2Fwww.bloomberg.com%2Fprofile%2Fcompany%2F2544189D%3ACH&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-4"><span class="mw-cite-backlink"><b><a href="#cite_ref-4">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation cs2"><a rel="nofollow" class="external text" href="https://chat.deepseek.com/downloads/DeepSeek%20Coder%20Model%20Service%20Agreement_1019.pdf">"DeepSeek Coder Model Service Agreement"</a> <span class="cs1-format">(PDF)</span>, <i>DeepSeek</i>, 19 October 2023</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=DeepSeek&rft.atitle=DeepSeek+Coder+Model+Service+Agreement&rft.date=2023-10-19&rft_id=https%3A%2F%2Fchat.deepseek.com%2Fdownloads%2FDeepSeek%2520Coder%2520Model%2520Service%2520Agreement_1019.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://chat.deepseek.com/downloads/DeepSeek%20Coder%20Privacy%20Policy_1019.pdf">"DeepSeek Coder Privacy Policy"</a> <span class="cs1-format">(PDF)</span>. <i>DeepSeek</i><span class="reference-accessdate">. Retrieved <span class="nowrap">19 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=DeepSeek&rft.atitle=DeepSeek+Coder+Privacy+Policy&rft_id=https%3A%2F%2Fchat.deepseek.com%2Fdownloads%2FDeepSeek%2520Coder%2520Privacy%2520Policy_1019.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://beian.mps.gov.cn/#/query/webSearch?code=33010502011812">"全国互联网安全管理平台"</a>. <i>beian.mps.gov.cn</i><span class="reference-accessdate">. Retrieved <span class="nowrap">9 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=beian.mps.gov.cn&rft.atitle=%E5%85%A8%E5%9B%BD%E4%BA%92%E8%81%94%E7%BD%91%E5%AE%89%E5%85%A8%E7%AE%A1%E7%90%86%E5%B9%B3%E5%8F%B0&rft_id=https%3A%2F%2Fbeian.mps.gov.cn%2F%23%2Fquery%2FwebSearch%3Fcode%3D33010502011812&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.scmp.com/tech/policy/article/3295662/beijing-meeting-puts-spotlight-chinas-new-face-ai-deepseek-founder-liang-wenfeng">"Beijing puts spotlight on China's new face of AI, DeepSeek's Liang Wenfeng"</a>. <i>South China Morning Post</i>. 21 January 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">4 March</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=South+China+Morning+Post&rft.atitle=Beijing+puts+spotlight+on+China%E2%80%99s+new+face+of+AI%2C+DeepSeek%E2%80%99s+Liang+Wenfeng&rft.date=2025-01-21&rft_id=https%3A%2F%2Fwww.scmp.com%2Ftech%2Fpolicy%2Farticle%2F3295662%2Fbeijing-meeting-puts-spotlight-chinas-new-face-ai-deepseek-founder-liang-wenfeng&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-10">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation news cs1"><a rel="nofollow" class="external text" href="http://web.archive.org/web/20250219122827/https://www.reuters.com/technology/deepseek-founder-liang-wenfeng-puts-focus-chinese-innovation-2025-01-28/?">"Who is Liang Wenfeng, the founder of DeepSeek?"</a>. <i>Reuters</i>. Archived from <a rel="nofollow" class="external text" href="https://www.reuters.com/technology/deepseek-founder-liang-wenfeng-puts-focus-chinese-innovation-2025-01-28/">the original</a> on 19 February 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">4 March</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Reuters&rft.atitle=Who+is+Liang+Wenfeng%2C+the+founder+of+DeepSeek%3F&rft_id=https%3A%2F%2Fwww.reuters.com%2Ftechnology%2Fdeepseek-founder-liang-wenfeng-puts-focus-chinese-innovation-2025-01-28%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-11">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation news cs1"><a rel="nofollow" class="external text" href="https://www.economist.com/china/2025/02/19/behind-deepseek-lies-a-dazzling-chinese-university">"Behind DeepSeek lies a dazzling Chinese university"</a>. <i>The Economist</i>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0013-0613">0013-0613</a>. <a rel="nofollow" class="external text" href="https://archive.today/20250224111435/https://www.economist.com/china/2025/02/19/behind-deepseek-lies-a-dazzling-chinese-university">Archived</a> from the original on 24 February 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">5 March</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+Economist&rft.atitle=Behind+DeepSeek+lies+a+dazzling+Chinese+university&rft.issn=0013-0613&rft_id=https%3A%2F%2Fwww.economist.com%2Fchina%2F2025%2F02%2F19%2Fbehind-deepseek-lies-a-dazzling-chinese-university&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-12">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGibney2025" class="citation journal cs1">Gibney, Elizabeth (23 January 2025). <a rel="nofollow" class="external text" href="https://www.nature.com/articles/d41586-025-00229-6">"China's cheap, open AI model DeepSeek thrills scientists"</a>. <i><a href="/wiki/Nature_(journal)" title="Nature (journal)">Nature</a></i>. <b>638</b> (8049): <span class="nowrap">13–</span>14. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2025Natur.638...13G">2025Natur.638...13G</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1038%2Fd41586-025-00229-6">10.1038/d41586-025-00229-6</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/1476-4687">1476-4687</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a> <a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/39849139">39849139</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Nature&rft.atitle=China%27s+cheap%2C+open+AI+model+DeepSeek+thrills+scientists&rft.volume=638&rft.issue=8049&rft.pages=%3Cspan+class%3D%22nowrap%22%3E13-%3C%2Fspan%3E14&rft.date=2025-01-23&rft_id=info%3Adoi%2F10.1038%2Fd41586-025-00229-6&rft.issn=1476-4687&rft_id=info%3Apmid%2F39849139&rft_id=info%3Abibcode%2F2025Natur.638...13G&rft.aulast=Gibney&rft.aufirst=Elizabeth&rft_id=https%3A%2F%2Fwww.nature.com%2Farticles%2Fd41586-025-00229-6&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-vincent-13"><span class="mw-cite-backlink">^ <a href="#cite_ref-vincent_13-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-vincent_13-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-vincent_13-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-vincent_13-3"><sup><i><b>d</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFVincent2025" class="citation news cs1">Vincent, James (28 January 2025). <a rel="nofollow" class="external text" href="https://www.theguardian.com/commentisfree/2025/jan/28/deepseek-r1-ai-world-chinese-chatbot-tech-world-western">"The DeepSeek panic reveals an AI world ready to blow"</a>. <i><a href="/wiki/The_Guardian" title="The Guardian">The Guardian</a></i>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+Guardian&rft.atitle=The+DeepSeek+panic+reveals+an+AI+world+ready+to+blow&rft.date=2025-01-28&rft.aulast=Vincent&rft.aufirst=James&rft_id=https%3A%2F%2Fwww.theguardian.com%2Fcommentisfree%2F2025%2Fjan%2F28%2Fdeepseek-r1-ai-world-chinese-chatbot-tech-world-western&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Metz-2025a-14"><span class="mw-cite-backlink">^ <a href="#cite_ref-Metz-2025a_14-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Metz-2025a_14-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Metz-2025a_14-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-Metz-2025a_14-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-Metz-2025a_14-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-Metz-2025a_14-5"><sup><i><b>f</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMetzTobin2025" class="citation news cs1">Metz, Cade; Tobin, Meaghan (23 January 2025). <a rel="nofollow" class="external text" href="https://www.nytimes.com/2025/01/23/technology/deepseek-bd-ai-chips.html?smid=fb-nytimes&smtyp=cur&fbclid=IwY2xjawIEynFleHRuA2FlbQIxMQABHZYKXN7GJpUyNRsaGEDQVadxRBarp-aBp1GhiuRe3B57Ehe6HYv7oiK78Q_aem_KTeDgqjV_-R80owNNWOBCQ">"How Chinese A.I. Start-Up DeepSeek Is Competing With Silicon Valley Giants"</a>. <i><a href="/wiki/The_New_York_Times" title="The New York Times">The New York Times</a></i>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0362-4331">0362-4331</a><span class="reference-accessdate">. Retrieved <span class="nowrap">27 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+New+York+Times&rft.atitle=How+Chinese+A.I.+Start-Up+DeepSeek+Is+Competing+With+Silicon+Valley+Giants&rft.date=2025-01-23&rft.issn=0362-4331&rft.aulast=Metz&rft.aufirst=Cade&rft.au=Tobin%2C+Meaghan&rft_id=https%3A%2F%2Fwww.nytimes.com%2F2025%2F01%2F23%2Ftechnology%2Fdeepseek-bd-ai-chips.html%3Fsmid%3Dfb-nytimes%26smtyp%3Dcur%26fbclid%3DIwY2xjawIEynFleHRuA2FlbQIxMQABHZYKXN7GJpUyNRsaGEDQVadxRBarp-aBp1GhiuRe3B57Ehe6HYv7oiK78Q_aem_KTeDgqjV_-R80owNNWOBCQ&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Cosgrove-2025-15"><span class="mw-cite-backlink">^ <a href="#cite_ref-Cosgrove-2025_15-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Cosgrove-2025_15-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Cosgrove-2025_15-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFCosgrove2025" class="citation web cs1">Cosgrove, Emma (27 January 2025). <a rel="nofollow" class="external text" href="https://www.businessinsider.com/explaining-deepseek-chinese-models-efficiency-scaring-markets-2025-1">"DeepSeek's cheaper models and weaker chips call into question trillions in AI infrastructure spending"</a>. <i><a href="/wiki/Business_Insider" title="Business Insider">Business Insider</a></i>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Business+Insider&rft.atitle=DeepSeek%27s+cheaper+models+and+weaker+chips+call+into+question+trillions+in+AI+infrastructure+spending&rft.date=2025-01-27&rft.aulast=Cosgrove&rft.aufirst=Emma&rft_id=https%3A%2F%2Fwww.businessinsider.com%2Fexplaining-deepseek-chinese-models-efficiency-scaring-markets-2025-1&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Erdil-16"><span class="mw-cite-backlink">^ <a href="#cite_ref-Erdil_16-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Erdil_16-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFErdil2025" class="citation web cs1">Erdil, Ege (17 January 2025). <a rel="nofollow" class="external text" href="https://epoch.ai/gradient-updates/how-has-deepseek-improved-the-transformer-architecture">"How has DeepSeek improved the Transformer architecture?"</a>. <i>Epoch AI</i><span class="reference-accessdate">. Retrieved <span class="nowrap">3 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Epoch+AI&rft.atitle=How+has+DeepSeek+improved+the+Transformer+architecture%3F&rft.date=2025-01-17&rft.aulast=Erdil&rft.aufirst=Ege&rft_id=https%3A%2F%2Fepoch.ai%2Fgradient-updates%2Fhow-has-deepseek-improved-the-transformer-architecture&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Metz-2025b-17"><span class="mw-cite-backlink">^ <a href="#cite_ref-Metz-2025b_17-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Metz-2025b_17-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Metz-2025b_17-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-Metz-2025b_17-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-Metz-2025b_17-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-Metz-2025b_17-5"><sup><i><b>f</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMetz2025" class="citation news cs1">Metz, Cade (27 January 2025). <a rel="nofollow" class="external text" href="https://www.nytimes.com/2025/01/27/technology/what-is-deepseek-china-ai.html">"What is DeepSeek? And How Is It Upending A.I.?"</a>. <i><a href="/wiki/The_New_York_Times" title="The New York Times">The New York Times</a></i>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0362-4331">0362-4331</a><span class="reference-accessdate">. Retrieved <span class="nowrap">27 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+New+York+Times&rft.atitle=What+is+DeepSeek%3F+And+How+Is+It+Upending+A.I.%3F&rft.date=2025-01-27&rft.issn=0362-4331&rft.aulast=Metz&rft.aufirst=Cade&rft_id=https%3A%2F%2Fwww.nytimes.com%2F2025%2F01%2F27%2Ftechnology%2Fwhat-is-deepseek-china-ai.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-18">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFRoose2025" class="citation news cs1">Roose, Kevin (28 January 2025). <a rel="nofollow" class="external text" href="https://www.nytimes.com/2025/01/28/technology/why-deepseek-could-change-what-silicon-valley-believes-about-ai.html">"Why DeepSeek Could Change What Silicon Valley Believe About A.I."</a> <i><a href="/wiki/The_New_York_Times" title="The New York Times">The New York Times</a></i>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0362-4331">0362-4331</a><span class="reference-accessdate">. Retrieved <span class="nowrap">28 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+New+York+Times&rft.atitle=Why+DeepSeek+Could+Change+What+Silicon+Valley+Believe+About+A.I.&rft.date=2025-01-28&rft.issn=0362-4331&rft.aulast=Roose&rft.aufirst=Kevin&rft_id=https%3A%2F%2Fwww.nytimes.com%2F2025%2F01%2F28%2Ftechnology%2Fwhy-deepseek-could-change-what-silicon-valley-believes-about-ai.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Delbert-19"><span class="mw-cite-backlink">^ <a href="#cite_ref-Delbert_19-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Delbert_19-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFDelbert2025" class="citation web cs1">Delbert, Caroline (31 January 2025). <a rel="nofollow" class="external text" href="https://www.popularmechanics.com/science/a63633889/deepseek-open-weight/">"DeepSeek Is Cracking the 'Black Box' of Corporate AI Wide Open"</a>. <i><a href="/wiki/Popular_Mechanics" title="Popular Mechanics">Popular Mechanics</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">12 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Popular+Mechanics&rft.atitle=DeepSeek+Is+Cracking+the+%27Black+Box%27+of+Corporate+AI+Wide+Open&rft.date=2025-01-31&rft.aulast=Delbert&rft.aufirst=Caroline&rft_id=https%3A%2F%2Fwww.popularmechanics.com%2Fscience%2Fa63633889%2Fdeepseek-open-weight%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-20"><span class="mw-cite-backlink"><b><a href="#cite_ref-20">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGibney2025" class="citation journal cs1">Gibney, Elizabeth (23 January 2025). <a rel="nofollow" class="external text" href="https://www.nature.com/articles/d41586-025-00229-6">"China's cheap, open AI model DeepSeek thrills scientists"</a>. <i><a href="/wiki/Nature_(journal)" title="Nature (journal)">Nature</a></i>. <b>638</b> (8049): <span class="nowrap">13–</span>14. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2025Natur.638...13G">2025Natur.638...13G</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1038%2Fd41586-025-00229-6">10.1038/d41586-025-00229-6</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a> <a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/39849139">39849139</a><span class="reference-accessdate">. Retrieved <span class="nowrap">12 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Nature&rft.atitle=China%27s+cheap%2C+open+AI+model+DeepSeek+thrills+scientists&rft.volume=638&rft.issue=8049&rft.pages=%3Cspan+class%3D%22nowrap%22%3E13-%3C%2Fspan%3E14&rft.date=2025-01-23&rft_id=info%3Apmid%2F39849139&rft_id=info%3Adoi%2F10.1038%2Fd41586-025-00229-6&rft_id=info%3Abibcode%2F2025Natur.638...13G&rft.aulast=Gibney&rft.aufirst=Elizabeth&rft_id=https%3A%2F%2Fwww.nature.com%2Farticles%2Fd41586-025-00229-6&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Metz-2025c-21"><span class="mw-cite-backlink"><b><a href="#cite_ref-Metz-2025c_21-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMetz2025" class="citation news cs1">Metz, Cade (12 February 2025). <a rel="nofollow" class="external text" href="https://www.nytimes.com/2025/02/12/technology/deepseek-ai-chip-costs.html">"How Did DeepSeek Build Its A.I. With Less Money?"</a>. <i><a href="/wiki/The_New_York_Times" title="The New York Times">The New York Times</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">21 March</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+New+York+Times&rft.atitle=How+Did+DeepSeek+Build+Its+A.I.+With+Less+Money%3F&rft.date=2025-02-12&rft.aulast=Metz&rft.aufirst=Cade&rft_id=https%3A%2F%2Fwww.nytimes.com%2F2025%2F02%2F12%2Ftechnology%2Fdeepseek-ai-chip-costs.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-22"><span class="mw-cite-backlink"><b><a href="#cite_ref-22">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSaah2025" class="citation web cs1">Saah, Jasper (13 February 2025). <a rel="nofollow" class="external text" href="https://liberationnews.org/deepseek-sends-shock-waves-across-silicon-valley/">"DeepSeek sends shock waves across Silicon Valley"</a>. <i><a href="/wiki/Party_for_Socialism_and_Liberation" title="Party for Socialism and Liberation">Liberation News – The Newspaper of the Party for Socialism and Liberation</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">13 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Liberation+News+%E2%80%93+The+Newspaper+of+the+Party+for+Socialism+and+Liberation&rft.atitle=DeepSeek+sends+shock+waves+across+Silicon+Valley&rft.date=2025-02-13&rft.aulast=Saah&rft.aufirst=Jasper&rft_id=https%3A%2F%2Fliberationnews.org%2Fdeepseek-sends-shock-waves-across-silicon-valley%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-23"><span class="mw-cite-backlink"><b><a href="#cite_ref-23">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSillars2025" class="citation web cs1">Sillars, James (28 January 2025). <a rel="nofollow" class="external text" href="https://news.sky.com/story/deepseek-us-tech-stocks-tumble-on-fears-of-cheaper-chinese-ai-13297788">"DeepSeek: Tech firm suffers biggest drop in US stock market history as low-cost Chinese AI company bites Silicon Valley"</a>. <i><a href="/wiki/Sky_News" title="Sky News">Sky News</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">13 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Sky+News&rft.atitle=DeepSeek%3A+Tech+firm+suffers+biggest+drop+in+US+stock+market+history+as+low-cost+Chinese+AI+company+bites+Silicon+Valley&rft.date=2025-01-28&rft.aulast=Sillars&rft.aufirst=James&rft_id=https%3A%2F%2Fnews.sky.com%2Fstory%2Fdeepseek-us-tech-stocks-tumble-on-fears-of-cheaper-chinese-ai-13297788&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-24"><span class="mw-cite-backlink"><b><a href="#cite_ref-24">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFChen2025" class="citation web cs1">Chen, Caiwei (24 January 2025). <a rel="nofollow" class="external text" href="https://www.technologyreview.com/2025/01/24/1110526/china-deepseek-top-ai-despite-sanctions/">"How a top Chinese AI model overcame US sanctions"</a>. <i><a href="/wiki/MIT_Technology_Review" title="MIT Technology Review">MIT Technology Review</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20250125180427/https://www.technologyreview.com/2025/01/24/1110526/china-deepseek-top-ai-despite-sanctions/">Archived</a> from the original on 25 January 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">25 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=MIT+Technology+Review&rft.atitle=How+a+top+Chinese+AI+model+overcame+US+sanctions&rft.date=2025-01-24&rft.aulast=Chen&rft.aufirst=Caiwei&rft_id=https%3A%2F%2Fwww.technologyreview.com%2F2025%2F01%2F24%2F1110526%2Fchina-deepseek-top-ai-despite-sanctions%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-HI-25"><span class="mw-cite-backlink">^ <a href="#cite_ref-HI_25-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-HI_25-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-HI_25-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-HI_25-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-HI_25-4"><sup><i><b>e</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1 cs1-prop-foreign-lang-source"><a rel="nofollow" class="external text" href="https://www.high-flyer.cn/history/">"幻方 | 幻方历程"</a>. <i><a href="/wiki/High-Flyer" title="High-Flyer">High-Flyer</a></i> (in Chinese (China))<span class="reference-accessdate">. Retrieved <span class="nowrap">2 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=High-Flyer&rft.atitle=%E5%B9%BB%E6%96%B9+%7C+%E5%B9%BB%E6%96%B9%E5%8E%86%E7%A8%8B&rft_id=https%3A%2F%2Fwww.high-flyer.cn%2Fhistory%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Ottinger-2024-26"><span class="mw-cite-backlink">^ <a href="#cite_ref-Ottinger-2024_26-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Ottinger-2024_26-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Ottinger-2024_26-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-Ottinger-2024_26-3"><sup><i><b>d</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFOttinger2024" class="citation web cs1">Ottinger, Lily (9 December 2024). <a rel="nofollow" class="external text" href="https://www.chinatalk.media/p/deepseek-from-hedge-fund-to-frontier">"Deepseek: From Hedge Fund to Frontier Model Maker"</a>. <i>ChinaTalk</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20241228030725/https://www.chinatalk.media/p/deepseek-from-hedge-fund-to-frontier">Archived</a> from the original on 28 December 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=ChinaTalk&rft.atitle=Deepseek%3A+From+Hedge+Fund+to+Frontier+Model+Maker&rft.date=2024-12-09&rft.aulast=Ottinger&rft.aufirst=Lily&rft_id=https%3A%2F%2Fwww.chinatalk.media%2Fp%2Fdeepseek-from-hedge-fund-to-frontier&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-FT_2025-27"><span class="mw-cite-backlink">^ <a href="#cite_ref-FT_2025_27-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-FT_2025_27-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFOlcottWu2025" class="citation news cs1">Olcott, Eleanor; Wu, Zijing (24 January 2025). <a rel="nofollow" class="external text" href="https://www.removepaywall.com/search?url=https://www.ft.com/content/747a7b11-dcba-4aa5-8d25-403f56216d7e">"How small Chinese AI start-up DeepSeek shocked Silicon Valley"</a>. <i><a href="/wiki/Financial_Times" title="Financial Times">Financial Times</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">31 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Financial+Times&rft.atitle=How+small+Chinese+AI+start-up+DeepSeek+shocked+Silicon+Valley&rft.date=2025-01-24&rft.aulast=Olcott&rft.aufirst=Eleanor&rft.au=Wu%2C+Zijing&rft_id=https%3A%2F%2Fwww.removepaywall.com%2Fsearch%3Furl%3Dhttps%3A%2F%2Fwww.ft.com%2Fcontent%2F747a7b11-dcba-4aa5-8d25-403f56216d7e&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-CNBC_2023-28"><span class="mw-cite-backlink"><b><a href="#cite_ref-CNBC_2023_28-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFLeswing2023" class="citation news cs1">Leswing, Kif (23 February 2023). <a rel="nofollow" class="external text" href="https://www.cnbc.com/2023/02/23/nvidias-a100-is-the-10000-chip-powering-the-race-for-ai-.html">"Meet the $10,000 Nvidia chip powering the race for A.I."</a> <i><a href="/wiki/CNBC" title="CNBC">CNBC</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">30 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=CNBC&rft.atitle=Meet+the+%2410%2C000+Nvidia+chip+powering+the+race+for+A.I.&rft.date=2023-02-23&rft.aulast=Leswing&rft.aufirst=Kif&rft_id=https%3A%2F%2Fwww.cnbc.com%2F2023%2F02%2F23%2Fnvidias-a100-is-the-10000-chip-powering-the-race-for-ai-.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-RD-29"><span class="mw-cite-backlink">^ <a href="#cite_ref-RD_29-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-RD_29-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-RD_29-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-RD_29-3"><sup><i><b>d</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.high-flyer.cn/blog/hf-reduce/">"hfreduce | 高性能的多卡并行通信工具"</a>. <i><a href="/wiki/High-Flyer" title="High-Flyer">High-Flyer</a></i>. 4 March 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">3 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=High-Flyer&rft.atitle=hfreduce+%7C+%E9%AB%98%E6%80%A7%E8%83%BD%E7%9A%84%E5%A4%9A%E5%8D%A1%E5%B9%B6%E8%A1%8C%E9%80%9A%E4%BF%A1%E5%B7%A5%E5%85%B7&rft.date=2020-03-04&rft_id=https%3A%2F%2Fwww.high-flyer.cn%2Fblog%2Fhf-reduce%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Deng,_Chengqi-2024-30"><span class="mw-cite-backlink">^ <a href="#cite_ref-Deng,_Chengqi-2024_30-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Deng,_Chengqi-2024_30-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Deng,_Chengqi-2024_30-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-Deng,_Chengqi-2024_30-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-Deng,_Chengqi-2024_30-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-Deng,_Chengqi-2024_30-5"><sup><i><b>f</b></i></sup></a> <a href="#cite_ref-Deng,_Chengqi-2024_30-6"><sup><i><b>g</b></i></sup></a> <a href="#cite_ref-Deng,_Chengqi-2024_30-7"><sup><i><b>h</b></i></sup></a> <a href="#cite_ref-Deng,_Chengqi-2024_30-8"><sup><i><b>i</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFDeepSeek-AILiuFengXue2024" class="citation cs2">DeepSeek-AI; Liu, Aixin; Feng, Bei; Xue, Bing; Wang, Bingxuan; Wu, Bochao; Lu, Chengda; Zhao, Chenggang; Deng, Chengqi (27 December 2024), <i>DeepSeek-V3 Technical Report</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2412.19437">2412.19437</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=DeepSeek-V3+Technical+Report&rft.date=2024-12-27&rft_id=info%3Aarxiv%2F2412.19437&rft.au=DeepSeek-AI&rft.au=Liu%2C+Aixin&rft.au=Feng%2C+Bei&rft.au=Xue%2C+Bing&rft.au=Wang%2C+Bingxuan&rft.au=Wu%2C+Bochao&rft.au=Lu%2C+Chengda&rft.au=Zhao%2C+Chenggang&rft.au=Deng%2C+Chengqi&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-DL-31"><span class="mw-cite-backlink">^ <a href="#cite_ref-DL_31-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-DL_31-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-DL_31-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-DL_31-3"><sup><i><b>d</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFAnBiChenChen2024" class="citation book cs1">An, Wei; Bi, Xiao; Chen, Guanting; Chen, Shanhuang; Deng, Chengqi; Ding, Honghui; Dong, Kai; Du, Qiushi; Gao, Wenjun; Guan, Kang; Guo, Jianzhong; Guo, Yongqiang; Fu, Zhe; He, Ying; Huang, Panpan (17 November 2024). <a rel="nofollow" class="external text" href="https://ieeexplore.ieee.org/document/10793193">"Fire-Flyer AI-HPC: A Cost-Effective Software-Hardware Co-Design for Deep Learning"</a>. <i>SC24: International Conference for High Performance Computing, Networking, Storage and Analysis</i>. IEEE. pp. <span class="nowrap">1–</span>23. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2408.14158">2408.14158</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FSC41406.2024.00089">10.1109/SC41406.2024.00089</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a> <a href="/wiki/Special:BookSources/979-8-3503-5291-7" title="Special:BookSources/979-8-3503-5291-7"><bdi>979-8-3503-5291-7</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=bookitem&rft.atitle=Fire-Flyer+AI-HPC%3A+A+Cost-Effective+Software-Hardware+Co-Design+for+Deep+Learning&rft.btitle=SC24%3A+International+Conference+for+High+Performance+Computing%2C+Networking%2C+Storage+and+Analysis&rft.pages=%3Cspan+class%3D%22nowrap%22%3E1-%3C%2Fspan%3E23&rft.pub=IEEE&rft.date=2024-11-17&rft_id=info%3Aarxiv%2F2408.14158&rft_id=info%3Adoi%2F10.1109%2FSC41406.2024.00089&rft.isbn=979-8-3503-5291-7&rft.aulast=An&rft.aufirst=Wei&rft.au=Bi%2C+Xiao&rft.au=Chen%2C+Guanting&rft.au=Chen%2C+Shanhuang&rft.au=Deng%2C+Chengqi&rft.au=Ding%2C+Honghui&rft.au=Dong%2C+Kai&rft.au=Du%2C+Qiushi&rft.au=Gao%2C+Wenjun&rft.au=Guan%2C+Kang&rft.au=Guo%2C+Jianzhong&rft.au=Guo%2C+Yongqiang&rft.au=Fu%2C+Zhe&rft.au=He%2C+Ying&rft.au=Huang%2C+Panpan&rft_id=https%3A%2F%2Fieeexplore.ieee.org%2Fdocument%2F10793193&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-32"><span class="mw-cite-backlink"><b><a href="#cite_ref-32">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.yicai.com/news/101732215.html">"独家|幻方量化回应市场关注:AGI不是用来炒股的,"和金融没关系"<span class="cs1-kern-right"></span>"</a>. <i><a href="/wiki/China_Business_Network" class="mw-redirect" title="China Business Network">Yicai</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">3 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Yicai&rft.atitle=%E7%8B%AC%E5%AE%B6%7C%E5%B9%BB%E6%96%B9%E9%87%8F%E5%8C%96%E5%9B%9E%E5%BA%94%E5%B8%82%E5%9C%BA%E5%85%B3%E6%B3%A8%EF%BC%9AAGI%E4%B8%8D%E6%98%AF%E7%94%A8%E6%9D%A5%E7%82%92%E8%82%A1%E7%9A%84%EF%BC%8C%22%E5%92%8C%E9%87%91%E8%9E%8D%E6%B2%A1%E5%85%B3%E7%B3%BB%22&rft_id=https%3A%2F%2Fwww.yicai.com%2Fnews%2F101732215.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-33"><span class="mw-cite-backlink"><b><a href="#cite_ref-33">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFYu2023" class="citation web cs1">Yu, Xu (17 April 2023). <a rel="nofollow" class="external text" href="https://www.yicaiglobal.com/news/exclusive-chinese-quant-fund-high-flyer-will-not-use-agi-to-trade-stocks-managing-director-says">"[Exclusive] Chinese Quant Hedge Fund High-Flyer Won't Use AGI to Trade Stocks, MD Says"</a>. <i><a href="/wiki/China_Business_Network" class="mw-redirect" title="China Business Network">Yicai Global</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20231231030712/https://www.yicaiglobal.com/news/exclusive-chinese-quant-fund-high-flyer-will-not-use-agi-to-trade-stocks-managing-director-says">Archived</a> from the original on 31 December 2023<span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Yicai+Global&rft.atitle=%5BExclusive%5D+Chinese+Quant+Hedge+Fund+High-Flyer+Won%27t+Use+AGI+to+Trade+Stocks%2C+MD+Says&rft.date=2023-04-17&rft.aulast=Yu&rft.aufirst=Xu&rft_id=https%3A%2F%2Fwww.yicaiglobal.com%2Fnews%2Fexclusive-chinese-quant-fund-high-flyer-will-not-use-agi-to-trade-stocks-managing-director-says&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-scmp_1_January_2025-34"><span class="mw-cite-backlink">^ <a href="#cite_ref-scmp_1_January_2025_34-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-scmp_1_January_2025_34-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-scmp_1_January_2025_34-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-scmp_1_January_2025_34-3"><sup><i><b>d</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFJiangPerezi2025" class="citation web cs1">Jiang, Ben; Perezi, Bien (1 January 2025). <a rel="nofollow" class="external text" href="https://www.scmp.com/tech/tech-trends/article/3293050/meet-deepseek-chinese-start-changing-how-ai-models-are-trained">"Meet DeepSeek: the Chinese start-up that is changing how AI models are trained"</a>. <i><a href="/wiki/South_China_Morning_Post" title="South China Morning Post">South China Morning Post</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20250122160046/https://www.scmp.com/tech/tech-trends/article/3293050/meet-deepseek-chinese-start-changing-how-ai-models-are-trained">Archived</a> from the original on 22 January 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">1 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=South+China+Morning+Post&rft.atitle=Meet+DeepSeek%3A+the+Chinese+start-up+that+is+changing+how+AI+models+are+trained&rft.date=2025-01-01&rft.aulast=Jiang&rft.aufirst=Ben&rft.au=Perezi%2C+Bien&rft_id=https%3A%2F%2Fwww.scmp.com%2Ftech%2Ftech-trends%2Farticle%2F3293050%2Fmeet-deepseek-chinese-start-changing-how-ai-models-are-trained&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-McMorrow-2024-35"><span class="mw-cite-backlink">^ <a href="#cite_ref-McMorrow-2024_35-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-McMorrow-2024_35-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMcMorrowOlcott2024" class="citation news cs1">McMorrow, Ryan; Olcott, Eleanor (9 June 2024). <a rel="nofollow" class="external text" href="https://www.ft.com/content/357f3c68-b866-4c2e-b678-0d075051a260">"The Chinese quant fund-turned-AI pioneer"</a>. <i><a href="/wiki/Financial_Times" title="Financial Times">Financial Times</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20240717030903/https://www.ft.com/content/357f3c68-b866-4c2e-b678-0d075051a260">Archived</a> from the original on 17 July 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Financial+Times&rft.atitle=The+Chinese+quant+fund-turned-AI+pioneer&rft.date=2024-06-09&rft.aulast=McMorrow&rft.aufirst=Ryan&rft.au=Olcott%2C+Eleanor&rft_id=https%3A%2F%2Fwww.ft.com%2Fcontent%2F357f3c68-b866-4c2e-b678-0d075051a260&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Dong,_Kai-2024-36"><span class="mw-cite-backlink">^ <a href="#cite_ref-Dong,_Kai-2024_36-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Dong,_Kai-2024_36-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Dong,_Kai-2024_36-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-Dong,_Kai-2024_36-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-Dong,_Kai-2024_36-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-Dong,_Kai-2024_36-5"><sup><i><b>f</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFDeepSeek-AIBiChenChen2024" class="citation cs2">DeepSeek-AI; Bi, Xiao; Chen, Deli; Chen, Guanting; Chen, Shanhuang; Dai, Damai; Deng, Chengqi; Ding, Honghui; Dong, Kai (5 January 2024), <i>DeepSeek LLM: Scaling Open-Source Language Models with Longtermism</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2401.02954">2401.02954</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=DeepSeek+LLM%3A+Scaling+Open-Source+Language+Models+with+Longtermism&rft.date=2024-01-05&rft_id=info%3Aarxiv%2F2401.02954&rft.au=DeepSeek-AI&rft.au=Bi%2C+Xiao&rft.au=Chen%2C+Deli&rft.au=Chen%2C+Guanting&rft.au=Chen%2C+Shanhuang&rft.au=Dai%2C+Damai&rft.au=Deng%2C+Chengqi&rft.au=Ding%2C+Honghui&rft.au=Dong%2C+Kai&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Dai-2024-37"><span class="mw-cite-backlink">^ <a href="#cite_ref-Dai-2024_37-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Dai-2024_37-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Dai-2024_37-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-Dai-2024_37-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-Dai-2024_37-4"><sup><i><b>e</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFDaiDengZhaoXu2024" class="citation cs2">Dai, Damai; Deng, Chengqi; Zhao, Chenggang; Xu, R. X.; Gao, Huazuo; Chen, Deli; Li, Jiashi; Zeng, Wangding; Yu, Xingkai (11 January 2024), <i>DeepSeekMoE: Towards Ultimate Expert Specialization in Mixture-of-Experts Language Models</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2401.06066">2401.06066</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=DeepSeekMoE%3A+Towards+Ultimate+Expert+Specialization+in+Mixture-of-Experts+Language+Models&rft.date=2024-01-11&rft_id=info%3Aarxiv%2F2401.06066&rft.aulast=Dai&rft.aufirst=Damai&rft.au=Deng%2C+Chengqi&rft.au=Zhao%2C+Chenggang&rft.au=Xu%2C+R.+X.&rft.au=Gao%2C+Huazuo&rft.au=Chen%2C+Deli&rft.au=Li%2C+Jiashi&rft.au=Zeng%2C+Wangding&rft.au=Yu%2C+Xingkai&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-PL-38"><span class="mw-cite-backlink">^ <a href="#cite_ref-PL_38-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-PL_38-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFShaoWangZhuXu2024" class="citation cs2">Shao, Zhihong; Wang, Peiyi; Zhu, Qihao; Xu, Runxin; Song, Junxiao; Bi, Xiao; Zhang, Haowei; Zhang, Mingchuan; Li, Y. K. (27 April 2024), <i>DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2402.03300">2402.03300</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=DeepSeekMath%3A+Pushing+the+Limits+of+Mathematical+Reasoning+in+Open+Language+Models&rft.date=2024-04-27&rft_id=info%3Aarxiv%2F2402.03300&rft.aulast=Shao&rft.aufirst=Zhihong&rft.au=Wang%2C+Peiyi&rft.au=Zhu%2C+Qihao&rft.au=Xu%2C+Runxin&rft.au=Song%2C+Junxiao&rft.au=Bi%2C+Xiao&rft.au=Zhang%2C+Haowei&rft.au=Zhang%2C+Mingchuan&rft.au=Li%2C+Y.+K.&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span>.</span> </li> <li id="cite_note-V2-39"><span class="mw-cite-backlink">^ <a href="#cite_ref-V2_39-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-V2_39-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-V2_39-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-V2_39-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-V2_39-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-V2_39-5"><sup><i><b>f</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFDeepSeek-AIZhuGuoShao2024" class="citation cs2">DeepSeek-AI; Zhu, Qihao; Guo, Daya; Shao, Zhihong; Yang, Dejian; Wang, Peiyi; Xu, Runxin; Wu, Y.; Li, Yukun (17 June 2024), <i>DeepSeek-Coder-V2: Breaking the Barrier of Closed-Source Models in Code Intelligence</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2406.11931">2406.11931</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=DeepSeek-Coder-V2%3A+Breaking+the+Barrier+of+Closed-Source+Models+in+Code+Intelligence&rft.date=2024-06-17&rft_id=info%3Aarxiv%2F2406.11931&rft.au=DeepSeek-AI&rft.au=Zhu%2C+Qihao&rft.au=Guo%2C+Daya&rft.au=Shao%2C+Zhihong&rft.au=Yang%2C+Dejian&rft.au=Wang%2C+Peiyi&rft.au=Xu%2C+Runxin&rft.au=Wu%2C+Y.&rft.au=Li%2C+Yukun&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-HF-40"><span class="mw-cite-backlink">^ <a href="#cite_ref-HF_40-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-HF_40-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://huggingface.co/deepseek-ai/DeepSeek-V2.5">"deepseek-ai/DeepSeek-V2.5 · Hugging Face"</a>. <i><a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></i>. 3 January 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">28 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Hugging+Face&rft.atitle=deepseek-ai%2FDeepSeek-V2.5+%C2%B7+Hugging+Face&rft.date=2025-01-03&rft_id=https%3A%2F%2Fhuggingface.co%2Fdeepseek-ai%2FDeepSeek-V2.5&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-DSLI_1-41"><span class="mw-cite-backlink">^ <a href="#cite_ref-DSLI_1_41-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-DSLI_1_41-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://chat.deepseek.com/sign_in">"Deepseek Log in page"</a>. <i>DeepSeek</i><span class="reference-accessdate">. Retrieved <span class="nowrap">30 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=DeepSeek&rft.atitle=Deepseek+Log+in+page&rft_id=https%3A%2F%2Fchat.deepseek.com%2Fsign_in&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-RP-42"><span class="mw-cite-backlink">^ <a href="#cite_ref-RP_42-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-RP_42-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20241120141324/https://api-docs.deepseek.com/news/news1120">"News | DeepSeek-R1-Lite Release 2024/11/20: 🚀 DeepSeek-R1-Lite-Preview is now live: unleashing supercharged reasoning power!"</a>. <i>DeepSeek API Docs</i>. Archived from <a rel="nofollow" class="external text" href="https://api-docs.deepseek.com/news/news1120">the original</a> on 20 November 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=DeepSeek+API+Docs&rft.atitle=News+%7C+DeepSeek-R1-Lite+Release+2024%2F11%2F20%3A+%F0%9F%9A%80+DeepSeek-R1-Lite-Preview+is+now+live%3A+unleashing+supercharged+reasoning+power%21&rft_id=https%3A%2F%2Fapi-docs.deepseek.com%2Fnews%2Fnews1120&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-43"><span class="mw-cite-backlink"><b><a href="#cite_ref-43">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFField2025" class="citation web cs1">Field, Hayden (27 January 2025). <a rel="nofollow" class="external text" href="https://www.cnbc.com/2025/01/27/chinas-deepseek-ai-tops-chatgpt-app-store-what-you-should-know.html">"China's DeepSeek AI dethrones ChatGPT on App Store: Here's what you should know"</a>. <i><a href="/wiki/CNBC" title="CNBC">CNBC</a></i>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=CNBC&rft.atitle=China%27s+DeepSeek+AI+dethrones+ChatGPT+on+App+Store%3A+Here%27s+what+you+should+know&rft.date=2025-01-27&rft.aulast=Field&rft.aufirst=Hayden&rft_id=https%3A%2F%2Fwww.cnbc.com%2F2025%2F01%2F27%2Fchinas-deepseek-ai-tops-chatgpt-app-store-what-you-should-know.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-44"><span class="mw-cite-backlink"><b><a href="#cite_ref-44">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFPicchi2025" class="citation web cs1">Picchi, Aimee (27 January 2025). <a rel="nofollow" class="external text" href="https://www.cbsnews.com/news/what-is-deepseek-ai-china-stock-nvidia-nvda-asml/">"What is DeepSeek, and why is it causing Nvidia and other stocks to slump?"</a>. <i><a href="/wiki/CBS_News" title="CBS News">CBS News</a></i>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=CBS+News&rft.atitle=What+is+DeepSeek%2C+and+why+is+it+causing+Nvidia+and+other+stocks+to+slump%3F&rft.date=2025-01-27&rft.aulast=Picchi&rft.aufirst=Aimee&rft_id=https%3A%2F%2Fwww.cbsnews.com%2Fnews%2Fwhat-is-deepseek-ai-china-stock-nvidia-nvda-asml%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-46"><span class="mw-cite-backlink"><b><a href="#cite_ref-46">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.cls.cn/detail/1672635">"大模型价格又砍一刀 这次"屠夫"竟是量化私募?"</a>. <i>www.cls.cn</i>. 10 May 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">3 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=www.cls.cn&rft.atitle=%E5%A4%A7%E6%A8%A1%E5%9E%8B%E4%BB%B7%E6%A0%BC%E5%8F%88%E7%A0%8D%E4%B8%80%E5%88%80+%E8%BF%99%E6%AC%A1%22%E5%B1%A0%E5%A4%AB%22%E7%AB%9F%E6%98%AF%E9%87%8F%E5%8C%96%E7%A7%81%E5%8B%9F%EF%BC%9F&rft.date=2024-05-10&rft_id=https%3A%2F%2Fwww.cls.cn%2Fdetail%2F1672635&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Schneider-2024-47"><span class="mw-cite-backlink">^ <a href="#cite_ref-Schneider-2024_47-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Schneider-2024_47-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSchneider2024" class="citation web cs1">Schneider, Jordan (27 November 2024). <a rel="nofollow" class="external text" href="https://www.chinatalk.media/p/deepseek-ceo-interview-with-chinas">"Deepseek: The Quiet Giant Leading China's AI Race"</a>. <i>ChinaTalk</i><span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=ChinaTalk&rft.atitle=Deepseek%3A+The+Quiet+Giant+Leading+China%27s+AI+Race&rft.date=2024-11-27&rft.aulast=Schneider&rft.aufirst=Jordan&rft_id=https%3A%2F%2Fwww.chinatalk.media%2Fp%2Fdeepseek-ceo-interview-with-chinas&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-48"><span class="mw-cite-backlink"><b><a href="#cite_ref-48">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.high-flyer.cn/blog/3fs/">"幻方力量 | 高速文件系统 3FS"</a>. <i><a href="/wiki/High-Flyer" title="High-Flyer">High-Flyer</a></i>. 13 June 2019<span class="reference-accessdate">. Retrieved <span class="nowrap">3 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=High-Flyer&rft.atitle=%E5%B9%BB%E6%96%B9%E5%8A%9B%E9%87%8F+%7C+%E9%AB%98%E9%80%9F%E6%96%87%E4%BB%B6%E7%B3%BB%E7%BB%9F+3FS&rft.date=2019-06-13&rft_id=https%3A%2F%2Fwww.high-flyer.cn%2Fblog%2F3fs%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-49"><span class="mw-cite-backlink"><b><a href="#cite_ref-49">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation cs2"><a rel="nofollow" class="external text" href="https://github.com/deepseek-ai/3FS"><i>deepseek-ai/3FS</i></a>, DeepSeek, 28 February 2025<span class="reference-accessdate">, retrieved <span class="nowrap">28 February</span> 2025</span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=deepseek-ai%2F3FS&rft.pub=DeepSeek&rft.date=2025-02-28&rft_id=https%3A%2F%2Fgithub.com%2Fdeepseek-ai%2F3FS&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-50"><span class="mw-cite-backlink"><b><a href="#cite_ref-50">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation cs2"><a rel="nofollow" class="external text" href="https://github.com/HFAiLab/hai-platform">"HFAiLab/hai-platform"</a>, <i><a href="/wiki/High-Flyer" title="High-Flyer">High-Flyer</a></i>, 2 February 2025<span class="reference-accessdate">, retrieved <span class="nowrap">3 February</span> 2025</span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=High-Flyer&rft.atitle=HFAiLab%2Fhai-platform&rft.date=2025-02-02&rft_id=https%3A%2F%2Fgithub.com%2FHFAiLab%2Fhai-platform&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Ma,_Shirong-2025-51"><span class="mw-cite-backlink">^ <a href="#cite_ref-Ma,_Shirong-2025_51-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Ma,_Shirong-2025_51-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Ma,_Shirong-2025_51-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-Ma,_Shirong-2025_51-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-Ma,_Shirong-2025_51-4"><sup><i><b>e</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFDeepSeek-AIGuoYangZhang2025" class="citation cs2">DeepSeek-AI; Guo, Daya; Yang, Dejian; Zhang, Haowei; Song, Junxiao; Zhang, Ruoyu; Xu, Runxin; Zhu, Qihao; Ma, Shirong (22 January 2025), <i>DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2501.12948">2501.12948</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=DeepSeek-R1%3A+Incentivizing+Reasoning+Capability+in+LLMs+via+Reinforcement+Learning&rft.date=2025-01-22&rft_id=info%3Aarxiv%2F2501.12948&rft.au=DeepSeek-AI&rft.au=Guo%2C+Daya&rft.au=Yang%2C+Dejian&rft.au=Zhang%2C+Haowei&rft.au=Song%2C+Junxiao&rft.au=Zhang%2C+Ruoyu&rft.au=Xu%2C+Runxin&rft.au=Zhu%2C+Qihao&rft.au=Ma%2C+Shirong&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-52"><span class="mw-cite-backlink"><b><a href="#cite_ref-52">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGibney2025" class="citation journal cs1">Gibney, Elizabeth (23 January 2025). <a rel="nofollow" class="external text" href="https://www.nature.com/articles/d41586-025-00229-6">"China's cheap, open AI model DeepSeek thrills scientists"</a>. <i><a href="/wiki/Nature_(journal)" title="Nature (journal)">Nature</a></i>. <b>638</b> (8049): <span class="nowrap">13–</span>14. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2025Natur.638...13G">2025Natur.638...13G</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1038%2Fd41586-025-00229-6">10.1038/d41586-025-00229-6</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a> <a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/39849139">39849139</a><span class="reference-accessdate">. Retrieved <span class="nowrap">12 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Nature&rft.atitle=China%27s+cheap%2C+open+AI+model+DeepSeek+thrills+scientists&rft.volume=638&rft.issue=8049&rft.pages=%3Cspan+class%3D%22nowrap%22%3E13-%3C%2Fspan%3E14&rft.date=2025-01-23&rft_id=info%3Apmid%2F39849139&rft_id=info%3Adoi%2F10.1038%2Fd41586-025-00229-6&rft_id=info%3Abibcode%2F2025Natur.638...13G&rft.aulast=Gibney&rft.aufirst=Elizabeth&rft_id=https%3A%2F%2Fwww.nature.com%2Farticles%2Fd41586-025-00229-6&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-53"><span class="mw-cite-backlink"><b><a href="#cite_ref-53">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://github.com/deepseek-ai/DeepSeek-Coder/blob/main/LICENSE-MODEL">"DeepSeek-Coder/LICENSE-MODEL at main · deepseek-ai/DeepSeek-Coder"</a>. <i><a href="/wiki/GitHub" title="GitHub">GitHub</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20250122195853/https://github.com/deepseek-ai/deepseek-coder/blob/main/LICENSE-MODEL">Archived</a> from the original on 22 January 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">24 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=GitHub&rft.atitle=DeepSeek-Coder%2FLICENSE-MODEL+at+main+%C2%B7+deepseek-ai%2FDeepSeek-Coder&rft_id=https%3A%2F%2Fgithub.com%2Fdeepseek-ai%2FDeepSeek-Coder%2Fblob%2Fmain%2FLICENSE-MODEL&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Guo-2024-54"><span class="mw-cite-backlink">^ <a href="#cite_ref-Guo-2024_54-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Guo-2024_54-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Guo-2024_54-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGuoZhuYangXie2024" class="citation cs2">Guo, Daya; Zhu, Qihao; Yang, Dejian; Xie, Zhenda; Dong, Kai; Zhang, Wentao; Chen, Guanting; Bi, Xiao; Wu, Y. (26 January 2024), <i>DeepSeek-Coder: When the Large Language Model Meets Programming – The Rise of Code Intelligence</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2401.14196">2401.14196</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=DeepSeek-Coder%3A+When+the+Large+Language+Model+Meets+Programming+%E2%80%93+The+Rise+of+Code+Intelligence&rft.date=2024-01-26&rft_id=info%3Aarxiv%2F2401.14196&rft.aulast=Guo&rft.aufirst=Daya&rft.au=Zhu%2C+Qihao&rft.au=Yang%2C+Dejian&rft.au=Xie%2C+Zhenda&rft.au=Dong%2C+Kai&rft.au=Zhang%2C+Wentao&rft.au=Chen%2C+Guanting&rft.au=Bi%2C+Xiao&rft.au=Wu%2C+Y.&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-55"><span class="mw-cite-backlink"><b><a href="#cite_ref-55">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://deepseekcoder.github.io/">"DeepSeek Coder"</a>. <i>deepseekcoder.github.io</i><span class="reference-accessdate">. Retrieved <span class="nowrap">27 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=deepseekcoder.github.io&rft.atitle=DeepSeek+Coder&rft_id=https%3A%2F%2Fdeepseekcoder.github.io%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-56"><span class="mw-cite-backlink"><b><a href="#cite_ref-56">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation cs2"><a rel="nofollow" class="external text" href="https://github.com/deepseek-ai/deepseek-coder/"><i>deepseek-ai/DeepSeek-Coder</i></a>, DeepSeek, 27 January 2025<span class="reference-accessdate">, retrieved <span class="nowrap">27 January</span> 2025</span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=deepseek-ai%2FDeepSeek-Coder&rft.pub=DeepSeek&rft.date=2025-01-27&rft_id=https%3A%2F%2Fgithub.com%2Fdeepseek-ai%2Fdeepseek-coder%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-57"><span class="mw-cite-backlink"><b><a href="#cite_ref-57">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://huggingface.co/deepseek-ai/deepseek-coder-5.7bmqa-base">"deepseek-ai/deepseek-coder-5.7bmqa-base · Hugging Face"</a>. <i><a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">27 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Hugging+Face&rft.atitle=deepseek-ai%2Fdeepseek-coder-5.7bmqa-base+%C2%B7+Hugging+Face&rft_id=https%3A%2F%2Fhuggingface.co%2Fdeepseek-ai%2Fdeepseek-coder-5.7bmqa-base&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-59"><span class="mw-cite-backlink"><b><a href="#cite_ref-59">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation cs2"><a rel="nofollow" class="external text" href="https://github.com/deepseek-ai/DeepSeek-LLM"><i>deepseek-ai/DeepSeek-LLM</i></a>, DeepSeek, 27 January 2025<span class="reference-accessdate">, retrieved <span class="nowrap">27 January</span> 2025</span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=deepseek-ai%2FDeepSeek-LLM&rft.pub=DeepSeek&rft.date=2025-01-27&rft_id=https%3A%2F%2Fgithub.com%2Fdeepseek-ai%2FDeepSeek-LLM&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-60"><span class="mw-cite-backlink"><b><a href="#cite_ref-60">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWangLiShaoXu2024" class="citation cs2">Wang, Peiyi; Li, Lei; Shao, Zhihong; Xu, R. X.; Dai, Damai; Li, Yifei; Chen, Deli; Wu, Y.; Sui, Zhifang (19 February 2024), <i>Math-Shepherd: Verify and Reinforce LLMs Step-by-step without Human Annotations</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2312.08935">2312.08935</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=Math-Shepherd%3A+Verify+and+Reinforce+LLMs+Step-by-step+without+Human+Annotations&rft.date=2024-02-19&rft_id=info%3Aarxiv%2F2312.08935&rft.aulast=Wang&rft.aufirst=Peiyi&rft.au=Li%2C+Lei&rft.au=Shao%2C+Zhihong&rft.au=Xu%2C+R.+X.&rft.au=Dai%2C+Damai&rft.au=Li%2C+Yifei&rft.au=Chen%2C+Deli&rft.au=Wu%2C+Y.&rft.au=Sui%2C+Zhifang&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span>.</span> </li> <li id="cite_note-Ruan,_Chong-2024-61"><span class="mw-cite-backlink">^ <a href="#cite_ref-Ruan,_Chong-2024_61-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Ruan,_Chong-2024_61-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Ruan,_Chong-2024_61-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-Ruan,_Chong-2024_61-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-Ruan,_Chong-2024_61-4"><sup><i><b>e</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFDeepSeek-AILiuFengWang2024" class="citation cs2">DeepSeek-AI; Liu, Aixin; Feng, Bei; Wang, Bin; Wang, Bingxuan; Liu, Bo; Zhao, Chenggang; Dengr, Chengqi; Ruan, Chong (19 June 2024), <i>DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2405.04434">2405.04434</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=DeepSeek-V2%3A+A+Strong%2C+Economical%2C+and+Efficient+Mixture-of-Experts+Language+Model&rft.date=2024-06-19&rft_id=info%3Aarxiv%2F2405.04434&rft.au=DeepSeek-AI&rft.au=Liu%2C+Aixin&rft.au=Feng%2C+Bei&rft.au=Wang%2C+Bin&rft.au=Wang%2C+Bingxuan&rft.au=Liu%2C+Bo&rft.au=Zhao%2C+Chenggang&rft.au=Dengr%2C+Chengqi&rft.au=Ruan%2C+Chong&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span>.</span> </li> <li id="cite_note-Peng-2023-62"><span class="mw-cite-backlink">^ <a href="#cite_ref-Peng-2023_62-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Peng-2023_62-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFPengQuesnelleFanShippole2023" class="citation cs2">Peng, Bowen; Quesnelle, Jeffrey; Fan, Honglu; Shippole, Enrico (1 November 2023), <i>YaRN: Efficient Context Window Extension of Large Language Models</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2309.00071">2309.00071</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=YaRN%3A+Efficient+Context+Window+Extension+of+Large+Language+Models&rft.date=2023-11-01&rft_id=info%3Aarxiv%2F2309.00071&rft.aulast=Peng&rft.aufirst=Bowen&rft.au=Quesnelle%2C+Jeffrey&rft.au=Fan%2C+Honglu&rft.au=Shippole%2C+Enrico&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span>.</span> </li> <li id="cite_note-63"><span class="mw-cite-backlink"><b><a href="#cite_ref-63">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite/blob/main/config.json">"config.json · deepseek-ai/DeepSeek-V2-Lite at main"</a>. <i><a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></i>. 15 May 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Hugging+Face&rft.atitle=config.json+%C2%B7+deepseek-ai%2FDeepSeek-V2-Lite+at+main&rft.date=2024-05-15&rft_id=https%3A%2F%2Fhuggingface.co%2Fdeepseek-ai%2FDeepSeek-V2-Lite%2Fblob%2Fmain%2Fconfig.json&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-64"><span class="mw-cite-backlink"><b><a href="#cite_ref-64">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://huggingface.co/deepseek-ai/DeepSeek-V2/blob/main/config.json">"config.json · deepseek-ai/DeepSeek-V2 at main"</a>. <i><a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></i>. 6 May 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Hugging+Face&rft.atitle=config.json+%C2%B7+deepseek-ai%2FDeepSeek-V2+at+main&rft.date=2024-05-06&rft_id=https%3A%2F%2Fhuggingface.co%2Fdeepseek-ai%2FDeepSeek-V2%2Fblob%2Fmain%2Fconfig.json&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-66"><span class="mw-cite-backlink"><b><a href="#cite_ref-66">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://huggingface.co/deepseek-ai/DeepSeek-V3/blob/main/config.json">"config.json · deepseek-ai/DeepSeek-V3 at main"</a>. <i><a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></i>. 26 December 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Hugging+Face&rft.atitle=config.json+%C2%B7+deepseek-ai%2FDeepSeek-V3+at+main&rft.date=2024-12-26&rft_id=https%3A%2F%2Fhuggingface.co%2Fdeepseek-ai%2FDeepSeek-V3%2Fblob%2Fmain%2Fconfig.json&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-67"><span class="mw-cite-backlink"><b><a href="#cite_ref-67">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFPatelKourabiO'LaughlinKnuhtsen2025" class="citation web cs1">Patel, Dylan; Kourabi, AJ; O'Laughlin, Dylan; Knuhtsen, Doug (31 January 2025). <a rel="nofollow" class="external text" href="https://semianalysis.com/2025/01/31/deepseek-debates/">"DeepSeek Debates: Chinese Leadership On Cost, True Training Cost, Closed Model Margin Impacts"</a>. <i>SemiAnalysis</i><span class="reference-accessdate">. Retrieved <span class="nowrap">13 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=SemiAnalysis&rft.atitle=DeepSeek+Debates%3A+Chinese+Leadership+On+Cost%2C+True+Training+Cost%2C+Closed+Model+Margin+Impacts&rft.date=2025-01-31&rft.aulast=Patel&rft.aufirst=Dylan&rft.au=Kourabi%2C+AJ&rft.au=O%27Laughlin%2C+Dylan&rft.au=Knuhtsen%2C+Doug&rft_id=https%3A%2F%2Fsemianalysis.com%2F2025%2F01%2F31%2Fdeepseek-debates%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-68"><span class="mw-cite-backlink"><b><a href="#cite_ref-68">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFThubron2025" class="citation web cs1">Thubron, Rob (3 February 2025). <a rel="nofollow" class="external text" href="https://www.techspot.com/news/106612-deepseek-ai-costs-far-exceed-55-million-claim.html">"DeepSeek's AI costs far exceed $5.5 million claim, may have reached $1.6 billion with 50,000 Nvidia GPUs"</a>. <i>TechSpot</i><span class="reference-accessdate">. Retrieved <span class="nowrap">13 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=TechSpot&rft.atitle=DeepSeek%27s+AI+costs+far+exceed+%245.5+million+claim%2C+may+have+reached+%241.6+billion+with+50%2C000+Nvidia+GPUs&rft.date=2025-02-03&rft.aulast=Thubron&rft.aufirst=Rob&rft_id=https%3A%2F%2Fwww.techspot.com%2Fnews%2F106612-deepseek-ai-costs-far-exceed-55-million-claim.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-69"><span class="mw-cite-backlink"><b><a href="#cite_ref-69">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFKajal2025" class="citation web cs1">Kajal, Kapil (31 January 2025). <a rel="nofollow" class="external text" href="https://www.yahoo.com/news/research-exposes-deepseek-ai-training-165025904.html">"Research exposes DeepSeek's AI training cost is not $6M, it's a staggering $1.3B"</a>. <i><a href="/wiki/Yahoo_News" title="Yahoo News">Yahoo News</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">13 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Yahoo+News&rft.atitle=Research+exposes+DeepSeek%27s+AI+training+cost+is+not+%246M%2C+it%27s+a+staggering+%241.3B&rft.date=2025-01-31&rft.aulast=Kajal&rft.aufirst=Kapil&rft_id=https%3A%2F%2Fwww.yahoo.com%2Fnews%2Fresearch-exposes-deepseek-ai-training-165025904.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-70"><span class="mw-cite-backlink"><b><a href="#cite_ref-70">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://therecursive.com/martin-vechev-of-insait-deepseek-6m-cost-of-training-is-misleading/">"Martin Vechev of INSAIT: "DeepSeek $6M Cost Of Training Is Misleading"<span class="cs1-kern-right"></span>"</a>. <i>TheRecursive.com</i>. 28 January 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">13 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=TheRecursive.com&rft.atitle=Martin+Vechev+of+INSAIT%3A+%22DeepSeek+%246M+Cost+Of+Training+Is+Misleading%22&rft.date=2025-01-28&rft_id=https%3A%2F%2Ftherecursive.com%2Fmartin-vechev-of-insait-deepseek-6m-cost-of-training-is-misleading%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-71"><span class="mw-cite-backlink"><b><a href="#cite_ref-71">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFJiang2024" class="citation web cs1">Jiang, Ben (27 December 2024). <a rel="nofollow" class="external text" href="https://www.scmp.com/tech/tech-trends/article/3292507/chinese-start-deepseek-launches-ai-model-outperforms-meta-openai-products">"Chinese start-up DeepSeek's new AI model outperforms Meta, OpenAI products"</a>. <i><a href="/wiki/South_China_Morning_Post" title="South China Morning Post">South China Morning Post</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20241227191529/https://www.scmp.com/tech/tech-trends/article/3292507/chinese-start-deepseek-launches-ai-model-outperforms-meta-openai-products">Archived</a> from the original on 27 December 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=South+China+Morning+Post&rft.atitle=Chinese+start-up+DeepSeek%27s+new+AI+model+outperforms+Meta%2C+OpenAI+products&rft.date=2024-12-27&rft.aulast=Jiang&rft.aufirst=Ben&rft_id=https%3A%2F%2Fwww.scmp.com%2Ftech%2Ftech-trends%2Farticle%2F3292507%2Fchinese-start-deepseek-launches-ai-model-outperforms-meta-openai-products&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-72"><span class="mw-cite-backlink"><b><a href="#cite_ref-72">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSharma2024" class="citation web cs1">Sharma, Shubham (26 December 2024). <a rel="nofollow" class="external text" href="https://venturebeat.com/ai/deepseek-v3-ultra-large-open-source-ai-outperforms-llama-and-qwen-on-launch/">"DeepSeek-V3, ultra-large open-source AI, outperforms Llama and Qwen on launch"</a>. <i><a href="/wiki/VentureBeat" title="VentureBeat">VentureBeat</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20241227195503/https://venturebeat.com/ai/deepseek-v3-ultra-large-open-source-ai-outperforms-llama-and-qwen-on-launch/">Archived</a> from the original on 27 December 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=VentureBeat&rft.atitle=DeepSeek-V3%2C+ultra-large+open-source+AI%2C+outperforms+Llama+and+Qwen+on+launch&rft.date=2024-12-26&rft.aulast=Sharma&rft.aufirst=Shubham&rft_id=https%3A%2F%2Fventurebeat.com%2Fai%2Fdeepseek-v3-ultra-large-open-source-ai-outperforms-llama-and-qwen-on-launch%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-73"><span class="mw-cite-backlink"><b><a href="#cite_ref-73">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWiggers2024" class="citation web cs1">Wiggers, Kyle (26 December 2024). <a rel="nofollow" class="external text" href="https://techcrunch.com/2024/12/26/deepseeks-new-ai-model-appears-to-be-one-of-the-best-open-challengers-yet/">"DeepSeek's new AI model appears to be one of the best 'open' challengers yet"</a>. <i><a href="/wiki/TechCrunch" title="TechCrunch">TechCrunch</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20250102103526/https://techcrunch.com/2024/12/26/deepseeks-new-ai-model-appears-to-be-one-of-the-best-open-challengers-yet/">Archived</a> from the original on 2 January 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">31 December</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=TechCrunch&rft.atitle=DeepSeek%27s+new+AI+model+appears+to+be+one+of+the+best+%27open%27+challengers+yet&rft.date=2024-12-26&rft.aulast=Wiggers&rft.aufirst=Kyle&rft_id=https%3A%2F%2Ftechcrunch.com%2F2024%2F12%2F26%2Fdeepseeks-new-ai-model-appears-to-be-one-of-the-best-open-challengers-yet%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-74"><span class="mw-cite-backlink"><b><a href="#cite_ref-74">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFEdwards2025" class="citation web cs1">Edwards, Benj (21 January 2025). <a rel="nofollow" class="external text" href="https://arstechnica.com/ai/2025/01/china-is-catching-up-with-americas-best-reasoning-ai-models/">"Cutting-edge Chinese "reasoning" model rivals OpenAI o1—and it's free to download"</a>. <i><a href="/wiki/Ars_Technica" title="Ars Technica">Ars Technica</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">16 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Ars+Technica&rft.atitle=Cutting-edge+Chinese+%22reasoning%22+model+rivals+OpenAI+o1%E2%80%94and+it%27s+free+to+download&rft.date=2025-01-21&rft.aulast=Edwards&rft.aufirst=Benj&rft_id=https%3A%2F%2Farstechnica.com%2Fai%2F2025%2F01%2Fchina-is-catching-up-with-americas-best-reasoning-ai-models%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-76"><span class="mw-cite-backlink"><b><a href="#cite_ref-76">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFFranzen2024" class="citation web cs1">Franzen, Carl (20 November 2024). <a rel="nofollow" class="external text" href="https://venturebeat.com/ai/deepseeks-first-reasoning-model-r1-lite-preview-turns-heads-beating-openai-o1-performance/">"DeepSeek's first reasoning model R1-Lite-Preview turns heads, beating OpenAI o1 performance"</a>. <i><a href="/wiki/VentureBeat" title="VentureBeat">VentureBeat</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20241122010413/https://venturebeat.com/ai/deepseeks-first-reasoning-model-r1-lite-preview-turns-heads-beating-openai-o1-performance/">Archived</a> from the original on 22 November 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=VentureBeat&rft.atitle=DeepSeek%27s+first+reasoning+model+R1-Lite-Preview+turns+heads%2C+beating+OpenAI+o1+performance&rft.date=2024-11-20&rft.aulast=Franzen&rft.aufirst=Carl&rft_id=https%3A%2F%2Fventurebeat.com%2Fai%2Fdeepseeks-first-reasoning-model-r1-lite-preview-turns-heads-beating-openai-o1-performance%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-77"><span class="mw-cite-backlink"><b><a href="#cite_ref-77">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHuang2024" class="citation web cs1">Huang, Raffaele (24 December 2024). <a rel="nofollow" class="external text" href="https://www.wsj.com/tech/ai/china-ai-advances-us-chips-7838fd20">"Don't Look Now, but China's AI Is Catching Up Fast"</a>. <i><a href="/wiki/The_Wall_Street_Journal" title="The Wall Street Journal">The Wall Street Journal</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20241227183842/https://www.wsj.com/tech/ai/china-ai-advances-us-chips-7838fd20">Archived</a> from the original on 27 December 2024<span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=The+Wall+Street+Journal&rft.atitle=Don%27t+Look+Now%2C+but+China%27s+AI+Is+Catching+Up+Fast&rft.date=2024-12-24&rft.aulast=Huang&rft.aufirst=Raffaele&rft_id=https%3A%2F%2Fwww.wsj.com%2Ftech%2Fai%2Fchina-ai-advances-us-chips-7838fd20&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-78"><span class="mw-cite-backlink"><b><a href="#cite_ref-78">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://github.com/deepseek-ai/DeepSeek-R1/commit/23807ced51627276434655dd9f27725354818974">"Release DeepSeek-R1 · deepseek-ai/DeepSeek-R1@23807ce"</a>. <i><a href="/wiki/GitHub" title="GitHub">GitHub</a></i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20250121104009/https://github.com/deepseek-ai/DeepSeek-R1/commit/23807ced51627276434655dd9f27725354818974">Archived</a> from the original on 21 January 2025<span class="reference-accessdate">. Retrieved <span class="nowrap">21 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=GitHub&rft.atitle=Release+DeepSeek-R1+%C2%B7+deepseek-ai%2FDeepSeek-R1%4023807ce&rft_id=https%3A%2F%2Fgithub.com%2Fdeepseek-ai%2FDeepSeek-R1%2Fcommit%2F23807ced51627276434655dd9f27725354818974&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-79"><span class="mw-cite-backlink"><b><a href="#cite_ref-79">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFEduardo_BaptistaJulie_ZhuFanny_Potkin2025" class="citation web cs1">Eduardo Baptista; Julie Zhu; Fanny Potkin (25 February 2025). <a rel="nofollow" class="external text" href="https://www.reuters.com/technology/artificial-intelligence/deepseek-rushes-launch-new-ai-model-china-goes-all-2025-02-25/">"DeepSeek rushes to launch new AI model as China goes all in"</a>. <i>Reuters</i><span class="reference-accessdate">. Retrieved <span class="nowrap">25 February</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Reuters&rft.atitle=DeepSeek+rushes+to+launch+new+AI+model+as+China+goes+all+in&rft.date=2025-02-25&rft.au=Eduardo+Baptista&rft.au=Julie+Zhu&rft.au=Fanny+Potkin&rft_id=https%3A%2F%2Fwww.reuters.com%2Ftechnology%2Fartificial-intelligence%2Fdeepseek-rushes-launch-new-ai-model-china-goes-all-2025-02-25%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-80"><span class="mw-cite-backlink"><b><a href="#cite_ref-80">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFRoose2025" class="citation news cs1">Roose, Kevin (28 January 2025). <a rel="nofollow" class="external text" href="https://www.nytimes.com/2025/01/28/technology/why-deepseek-could-change-what-silicon-valley-believes-about-ai.html">"Why DeepSeek Could Change What Silicon Valley Believe About A.I."</a> <i><a href="/wiki/The_New_York_Times" title="The New York Times">The New York Times</a></i>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0362-4331">0362-4331</a><span class="reference-accessdate">. Retrieved <span class="nowrap">28 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+New+York+Times&rft.atitle=Why+DeepSeek+Could+Change+What+Silicon+Valley+Believe+About+A.I.&rft.date=2025-01-28&rft.issn=0362-4331&rft.aulast=Roose&rft.aufirst=Kevin&rft_id=https%3A%2F%2Fwww.nytimes.com%2F2025%2F01%2F28%2Ftechnology%2Fwhy-deepseek-could-change-what-silicon-valley-believes-about-ai.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-81"><span class="mw-cite-backlink"><b><a href="#cite_ref-81">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGibney2025" class="citation journal cs1">Gibney, Elizabeth (23 January 2025). <a rel="nofollow" class="external text" href="https://www.nature.com/articles/d41586-025-00229-6">"China's cheap, open AI model DeepSeek thrills scientists"</a>. <i><a href="/wiki/Nature_(journal)" title="Nature (journal)">Nature</a></i>. <b>638</b> (8049): <span class="nowrap">13–</span>14. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2025Natur.638...13G">2025Natur.638...13G</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1038%2Fd41586-025-00229-6">10.1038/d41586-025-00229-6</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/1476-4687">1476-4687</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a> <a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/39849139">39849139</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Nature&rft.atitle=China%27s+cheap%2C+open+AI+model+DeepSeek+thrills+scientists&rft.volume=638&rft.issue=8049&rft.pages=%3Cspan+class%3D%22nowrap%22%3E13-%3C%2Fspan%3E14&rft.date=2025-01-23&rft_id=info%3Adoi%2F10.1038%2Fd41586-025-00229-6&rft.issn=1476-4687&rft_id=info%3Apmid%2F39849139&rft_id=info%3Abibcode%2F2025Natur.638...13G&rft.aulast=Gibney&rft.aufirst=Elizabeth&rft_id=https%3A%2F%2Fwww.nature.com%2Farticles%2Fd41586-025-00229-6&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> <li id="cite_note-Chow_Perrigo-82"><span class="mw-cite-backlink"><b><a href="#cite_ref-Chow_Perrigo_82-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFChowPerrigo2025" class="citation news cs1">Chow, Andrew R.; Perrigo, Billy (30 January 2025). <a rel="nofollow" class="external text" href="https://time.com/7211646/is-deepseek-panic-overblown/">"Is the DeepSeek Panic Overblown?"</a>. <i>TIME</i><span class="reference-accessdate">. Retrieved <span class="nowrap">17 March</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=TIME&rft.atitle=Is+the+DeepSeek+Panic+Overblown%3F&rft.date=2025-01-30&rft.aulast=Chow&rft.aufirst=Andrew+R.&rft.au=Perrigo%2C+Billy&rft_id=https%3A%2F%2Ftime.com%2F7211646%2Fis-deepseek-panic-overblown%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3ADeepSeek" class="Z3988"></span></span> </li> </ol></div> <div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=DeepSeek&action=edit&section=20" title="Edit section: External links"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1235681985">.mw-parser-output .side-box{margin:4px 0;box-sizing:border-box;border:1px solid #aaa;font-size:88%;line-height:1.25em;background-color:var(--background-color-interactive-subtle,#f8f9fa);display:flow-root}.mw-parser-output .side-box-abovebelow,.mw-parser-output .side-box-text{padding:0.25em 0.9em}.mw-parser-output .side-box-image{padding:2px 0 2px 0.9em;text-align:center}.mw-parser-output .side-box-imageright{padding:2px 0.9em 2px 0;text-align:center}@media(min-width:500px){.mw-parser-output .side-box-flex{display:flex;align-items:center}.mw-parser-output .side-box-text{flex:1;min-width:0}}@media(min-width:720px){.mw-parser-output .side-box{width:238px}.mw-parser-output .side-box-right{clear:right;float:right;margin-left:1em}.mw-parser-output .side-box-left{margin-right:1em}}</style><style data-mw-deduplicate="TemplateStyles:r1237033735">@media print{body.ns-0 .mw-parser-output .sistersitebox{display:none!important}}@media screen{html.skin-theme-clientpref-night .mw-parser-output .sistersitebox img[src*="Wiktionary-logo-en-v2.svg"]{background-color:white}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .sistersitebox img[src*="Wiktionary-logo-en-v2.svg"]{background-color:white}}</style><div class="side-box side-box-right plainlinks sistersitebox"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1126788409" /> <div class="side-box-flex"> <div class="side-box-image"><span class="noviewer" typeof="mw:File"><a href="/wiki/File:Commons-logo.svg" class="mw-file-description"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/40px-Commons-logo.svg.png" decoding="async" width="30" height="40" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/60px-Commons-logo.svg.png 1.5x" data-file-width="1024" data-file-height="1376" /></a></span></div> <div class="side-box-text plainlist">Wikimedia Commons has media related to <span style="font-weight: bold; font-style: italic;"><a href="https://commons.wikimedia.org/wiki/Category:DeepSeek" class="extiw" title="commons:Category:DeepSeek">DeepSeek</a></span>.</div></div> </div> <ul><li><span class="official-website"><span class="url"><a rel="nofollow" class="external text" href="https://www.deepseek.com">Official website</a></span></span> <span class="mw-valign-text-top" typeof="mw:File/Frameless"><a href="https://www.wikidata.org/wiki/Q131577453#P856" title="Edit this at Wikidata"><img alt="Edit this at Wikidata" src="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png" decoding="async" width="10" height="10" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/15px-OOjs_UI_icon_edit-ltr-progressive.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/20px-OOjs_UI_icon_edit-ltr-progressive.svg.png 2x" data-file-width="20" data-file-height="20" /></a></span></li> <li><a rel="nofollow" class="external text" href="https://github.com/deepseek-ai">DeepSeek</a> on <a href="/wiki/GitHub" title="GitHub">GitHub</a></li> <li><a rel="nofollow" class="external text" href="https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210">DeepSeek</a> on <a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></li> <li><a rel="nofollow" class="external text" href="https://api-docs.deepseek.com/">Official API documentation</a></li> <li><a rel="nofollow" class="external text" href="https://huggingface.co/collections/Presidentlin/deepseek-papers-674c536aa6acddd9bc98c2ac">Anthology of DeepSeek papers</a></li> <li><a rel="nofollow" class="external text" href="https://www.high-flyer.cn/blog/">Research blog of High-Flyer</a></li></ul> <div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="Generative_AI_chatbots167" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Generative_AI_chatbots" title="Template:Generative AI chatbots"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Generative_AI_chatbots" title="Template talk:Generative AI chatbots"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Generative_AI_chatbots" title="Special:EditPage/Template:Generative AI chatbots"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Generative_AI_chatbots167" style="font-size:114%;margin:0 4em"><a href="/wiki/Generative_AI" class="mw-redirect" title="Generative AI">Generative AI</a> <a href="/wiki/Chatbot" title="Chatbot">chatbots</a></div></th></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><a href="/wiki/Large_language_model" title="Large language model">Large language model</a></li></ul> </div></td></tr><tr><td colspan="2" class="navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a></li> <li><a href="/wiki/Character.ai" title="Character.ai">Character.ai</a></li> <li><a href="/wiki/Claude_(language_model)" title="Claude (language model)">Claude</a></li> <li><a href="/wiki/DeepSeek_(chatbot)" title="DeepSeek (chatbot)">DeepSeek</a></li> <li><a href="/wiki/Ernie_Bot" title="Ernie Bot">ERNIE</a></li> <li><a href="/wiki/Gemini_(chatbot)" title="Gemini (chatbot)">Gemini</a></li> <li><a href="/wiki/Grok_(chatbot)" title="Grok (chatbot)">Grok</a></li> <li><a href="/wiki/Microsoft_Copilot" title="Microsoft Copilot">Copilot</a></li> <li><a href="/wiki/Minerva_AI" class="mw-redirect" title="Minerva AI">Minerva</a></li> <li><a href="/wiki/Mistral_AI" title="Mistral AI">Mistral</a></li> <li><a href="/wiki/Perplexity_AI" title="Perplexity AI">Perplexity AI</a></li> <li><a href="/wiki/Qwen" title="Qwen">Qwen</a></li> <li><a href="/wiki/Replika" title="Replika">Replika</a></li> <li><a href="/wiki/Velvet_AI" title="Velvet AI">Velvet</a></li> <li><a href="/wiki/YandexGPT" title="YandexGPT">YandexGPT</a></li> <li><a href="/wiki/You.com" title="You.com">You.com</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> <a href="/wiki/Category:Chatbots" title="Category:Chatbots">Category</a></li></ul> </div></td></tr></tbody></table></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236075235" /></div><div role="navigation" class="navbox" aria-labelledby="Generative_AI409" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239400231" /><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Generative_AI" title="Template:Generative AI"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Generative_AI" title="Template talk:Generative AI"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Generative_AI" title="Special:EditPage/Template:Generative AI"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Generative_AI409" style="font-size:114%;margin:0 4em"><a href="/wiki/Generative_artificial_intelligence" title="Generative artificial intelligence">Generative AI</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">Concepts</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">Generative adversarial network</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">Generative pre-trained transformer</a></li> <li><a href="/wiki/Large_language_model" title="Large language model">Large language model</a></li> <li><a href="/wiki/Neural_network_(machine_learning)" title="Neural network (machine learning)">Neural network</a></li> <li><a href="/wiki/Prompt_engineering" title="Prompt engineering">Prompt engineering</a></li> <li><a href="/wiki/Retrieval-augmented_generation" title="Retrieval-augmented generation">Retrieval-augmented generation</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">Reinforcement learning from human feedback</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a></li> <li><a href="/wiki/Variational_autoencoder" title="Variational autoencoder">Variational autoencoder</a></li> <li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision transformer</a></li> <li><a href="/wiki/Word_embedding" title="Word embedding">Word embedding</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Models</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Text</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Claude_(language_model)" title="Claude (language model)">Claude</a></li> <li><a href="/wiki/DBRX" title="DBRX">DBRX</a></li> <li><a href="/wiki/DeepSeek_(chatbot)" title="DeepSeek (chatbot)">DeepSeek</a></li> <li><a href="/wiki/Ernie_Bot" title="Ernie Bot">ERNIE</a></li> <li><a href="/wiki/Gemini_(chatbot)" title="Gemini (chatbot)">Gemini</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPT</a> <ul><li><a href="/wiki/GPT-1" title="GPT-1">1</a></li> <li><a href="/wiki/GPT-2" title="GPT-2">2</a></li> <li><a href="/wiki/GPT-3" title="GPT-3">3</a></li> <li><a href="/wiki/GPT-J" title="GPT-J">J</a></li> <li><a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a></li> <li><a href="/wiki/GPT-4" title="GPT-4">4</a></li> <li><a href="/wiki/GPT-4o" title="GPT-4o">4o</a></li> <li><a href="/wiki/GPT-4.5" title="GPT-4.5">4.5</a></li> <li><a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a></li> <li><a href="/wiki/OpenAI_o3" title="OpenAI o3">o3</a></li></ul></li> <li><a href="/wiki/Grok_(chatbot)" title="Grok (chatbot)">Grok</a></li> <li><a href="/wiki/IBM_Granite" title="IBM Granite">Granite</a></li> <li><a href="/wiki/Llama_(language_model)" title="Llama (language model)">Llama</a></li> <li><a href="/wiki/Mistral_AI#Mistral_Large" title="Mistral AI">Mistral Large</a></li> <li><a href="/wiki/Huawei_PanGu" title="Huawei PanGu">PanGu-Σ</a></li> <li><a href="/wiki/Qwen" title="Qwen">Qwen</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text-to-image_model" title="Text-to-image model">Image</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Aurora_(text-to-image_model)" class="mw-redirect" title="Aurora (text-to-image model)">Aurora</a></li> <li><a href="/wiki/DALL-E" title="DALL-E">DALL-E</a></li> <li><a href="/wiki/Adobe_Firefly" title="Adobe Firefly">Firefly</a></li> <li><a href="/wiki/Flux_(text-to-image_model)" title="Flux (text-to-image model)">Flux</a></li> <li><a href="/wiki/Ideogram_(text-to-image_model)" title="Ideogram (text-to-image model)">Ideogram</a></li> <li><a href="/wiki/Imagen_(text-to-image_model)" title="Imagen (text-to-image model)">Imagen</a></li> <li><a href="/wiki/Midjourney" title="Midjourney">Midjourney</a></li> <li><a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Speech</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/15.ai" title="15.ai">15.ai</a></li> <li><a href="/wiki/WaveNet" title="WaveNet">WaveNet</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text-to-video_model" title="Text-to-video model">Video</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Dream_Machine_(text-to-video_model)" title="Dream Machine (text-to-video model)">Dream Machine</a></li> <li><a href="/wiki/Runway_(company)#Gen-3_Alpha" title="Runway (company)">Gen-3 Alpha</a></li> <li><a href="/wiki/MiniMax_(company)#Hailuo_AI" title="MiniMax (company)">Hailuo AI</a></li> <li><a href="/wiki/Kling_(text-to-video_model)" class="mw-redirect" title="Kling (text-to-video model)">Kling</a></li> <li><a href="/wiki/Sora_(text-to-video_model)" title="Sora (text-to-video model)">Sora</a></li> <li><a href="/wiki/Google_DeepMind#Video_model" title="Google DeepMind">Veo</a></li> <li><a href="/wiki/VideoPoet" title="VideoPoet">VideoPoet</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Music</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Udio" title="Udio">Udio</a></li> <li><a href="/wiki/Suno_AI" title="Suno AI">Suno AI</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/List_of_artificial_intelligence_companies" title="List of artificial intelligence companies">Companies</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/01.AI" title="01.AI">01.AI</a></li> <li><a href="/wiki/Alibaba_Group" title="Alibaba Group">Alibaba</a></li> <li><a href="/wiki/Anthropic" title="Anthropic">Anthropic</a></li> <li><a href="/wiki/Baichuan" title="Baichuan">Baichuan</a></li> <li><a href="/wiki/Baidu" title="Baidu">Baidu</a></li> <li><a class="mw-selflink selflink">DeepSeek</a></li> <li><a href="/wiki/ElevenLabs" title="ElevenLabs">ElevenLabs</a></li> <li><a href="/wiki/Google_DeepMind" title="Google DeepMind">Google DeepMind</a></li> <li><a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></li> <li><a href="/wiki/Kuaishou" title="Kuaishou">Kuaishou</a></li> <li><a href="/wiki/Meta_AI" title="Meta AI">Meta AI</a></li> <li><a href="/wiki/MiniMax_(company)" title="MiniMax (company)">MiniMax</a></li> <li><a href="/wiki/Mistral_AI" title="Mistral AI">Mistral AI</a></li> <li><a href="/wiki/Moonshot_AI" title="Moonshot AI">Moonshot AI</a></li> <li><a href="/wiki/OpenAI" title="OpenAI">OpenAI</a></li> <li><a href="/wiki/Runway_(company)" title="Runway (company)">Runway</a></li> <li><a href="/wiki/Stability_AI" title="Stability AI">Stability AI</a></li> <li><a href="/wiki/Synthesia_(company)" title="Synthesia (company)">Synthesia</a></li> <li><a href="/wiki/XAI_(company)" title="XAI (company)">xAI</a></li> <li><a href="/wiki/Zhipu_AI" title="Zhipu AI">Zhipu AI</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> <b><a href="/wiki/Category:Generative_artificial_intelligence" title="Category:Generative artificial intelligence">Category</a></b></li> <li><span class="noviewer" typeof="mw:File"><span title="Commons page"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/20px-Commons-logo.svg.png" decoding="async" width="12" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/40px-Commons-logo.svg.png 2x" data-file-width="1024" data-file-height="1376" /></span></span> <b><a href="https://commons.wikimedia.org/wiki/Category:Generative_artificial_intelligence" class="extiw" title="commons:Category:Generative artificial intelligence">Commons</a></b></li></ul> </div></td></tr></tbody></table></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236075235" /></div><div role="navigation" class="navbox authority-control" aria-label="Navbox394" style="padding:3px"><table class="nowraplinks hlist navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Help:Authority_control" title="Help:Authority control">Authority control databases</a>: National <span class="mw-valign-text-top noprint" typeof="mw:File/Frameless"><a href="https://www.wikidata.org/wiki/Q131577453#identifiers" title="Edit this at Wikidata"><img alt="Edit this at Wikidata" src="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png" decoding="async" width="10" height="10" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/15px-OOjs_UI_icon_edit-ltr-progressive.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/20px-OOjs_UI_icon_edit-ltr-progressive.svg.png 2x" data-file-width="20" data-file-height="20" /></a></span></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"><ul><li><span class="uid"><a rel="nofollow" class="external text" href="https://d-nb.info/gnd/1355055415">Germany</a></span></li></ul></div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐web.eqiad.main‐75687f9f4b‐ss7tw Cached time: 20250322065943 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 1.147 seconds Real time usage: 1.471 seconds Preprocessor visited node count: 10899/1000000 Post‐expand include size: 221108/2097152 bytes Template argument size: 9952/2097152 bytes Highest expansion depth: 20/100 Expensive parser function count: 12/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 324805/5000000 bytes Lua time usage: 0.695/10.000 seconds Lua memory usage: 19884055/52428800 bytes Number of Wikibase entities loaded: 1/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 1238.704 1 -total 34.93% 432.731 3 Template:Reflist 18.71% 231.812 1 Template:Infobox_company 18.01% 223.062 1 Template:Infobox 17.14% 212.310 43 Template:Cite_web 7.39% 91.503 1 Template:Lang 6.66% 82.501 15 Template:Citation 6.37% 78.863 3 Template:Navbox 6.05% 74.972 1 Template:Generative_AI_chatbots 5.34% 66.165 1 Template:Short_description --> <!-- Saved in parser cache with key enwiki:pcache:78452842:|#|:idhash:canonical and timestamp 20250322065943 and revision id 1281555935. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?useformat=desktop&type=1x1&usesul3=0" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=DeepSeek&oldid=1281555935">https://en.wikipedia.org/w/index.php?title=DeepSeek&oldid=1281555935</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:2023_establishments_in_China" title="Category:2023 establishments in China">2023 establishments in China</a></li><li><a href="/wiki/Category:Artificial_intelligence_companies" title="Category:Artificial intelligence companies">Artificial intelligence companies</a></li><li><a href="/wiki/Category:Artificial_intelligence_laboratories" title="Category:Artificial intelligence laboratories">Artificial intelligence laboratories</a></li><li><a href="/wiki/Category:Companies_based_in_Hangzhou" title="Category:Companies based in Hangzhou">Companies based in Hangzhou</a></li><li><a href="/wiki/Category:Technology_companies_established_in_2023" title="Category:Technology companies established in 2023">Technology companies established in 2023</a></li><li><a href="/wiki/Category:Chinese_brands" title="Category:Chinese brands">Chinese brands</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Articles_containing_Chinese-language_text" title="Category:Articles containing Chinese-language text">Articles containing Chinese-language text</a></li><li><a href="/wiki/Category:Articles_containing_simplified_Chinese-language_text" title="Category:Articles containing simplified Chinese-language text">Articles containing simplified Chinese-language text</a></li><li><a href="/wiki/Category:CS1_Chinese_(China)-language_sources_(zh-cn)" title="Category:CS1 Chinese (China)-language sources (zh-cn)">CS1 Chinese (China)-language sources (zh-cn)</a></li><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_matches_Wikidata" title="Category:Short description matches Wikidata">Short description matches Wikidata</a></li><li><a href="/wiki/Category:Use_dmy_dates_from_February_2025" title="Category:Use dmy dates from February 2025">Use dmy dates from February 2025</a></li><li><a href="/wiki/Category:Use_American_English_from_February_2025" title="Category:Use American English from February 2025">Use American English from February 2025</a></li><li><a href="/wiki/Category:All_Wikipedia_articles_written_in_American_English" title="Category:All Wikipedia articles written in American English">All Wikipedia articles written in American English</a></li><li><a href="/wiki/Category:All_articles_with_unsourced_statements" title="Category:All articles with unsourced statements">All articles with unsourced statements</a></li><li><a href="/wiki/Category:Articles_with_unsourced_statements_from_February_2025" title="Category:Articles with unsourced statements from February 2025">Articles with unsourced statements from February 2025</a></li><li><a href="/wiki/Category:All_pages_needing_factual_verification" title="Category:All pages needing factual verification">All pages needing factual verification</a></li><li><a href="/wiki/Category:Wikipedia_articles_needing_factual_verification_from_March_2025" title="Category:Wikipedia articles needing factual verification from March 2025">Wikipedia articles needing factual verification from March 2025</a></li><li><a href="/wiki/Category:Wikipedia_articles_that_are_too_technical_from_January_2025" title="Category:Wikipedia articles that are too technical from January 2025">Wikipedia articles that are too technical from January 2025</a></li><li><a href="/wiki/Category:All_articles_that_are_too_technical" title="Category:All articles that are too technical">All articles that are too technical</a></li><li><a href="/wiki/Category:Articles_lacking_reliable_references_from_February_2025" title="Category:Articles lacking reliable references from February 2025">Articles lacking reliable references from February 2025</a></li><li><a href="/wiki/Category:All_articles_lacking_reliable_references" title="Category:All articles lacking reliable references">All articles lacking reliable references</a></li><li><a href="/wiki/Category:Commons_category_link_from_Wikidata" title="Category:Commons category link from Wikidata">Commons category link from Wikidata</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 21 March 2025, at 02:04<span class="anonymous-show"> (UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=DeepSeek&mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><picture><source media="(min-width: 500px)" srcset="/static/images/footer/wikimedia-button.svg" width="84" height="29"><img src="/static/images/footer/wikimedia.svg" width="25" height="25" alt="Wikimedia Foundation" lang="en" loading="lazy"></picture></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><picture><source media="(min-width: 500px)" srcset="/w/resources/assets/poweredby_mediawiki.svg" width="88" height="31"><img src="/w/resources/assets/mediawiki_compact.svg" alt="Powered by MediaWiki" lang="en" width="25" height="25" loading="lazy"></picture></a></li> </ul> </footer> </div> </div> </div> <div class="vector-header-container vector-sticky-header-container"> <div id="vector-sticky-header" class="vector-sticky-header"> <div class="vector-sticky-header-start"> <div class="vector-sticky-header-icon-start vector-button-flush-left vector-button-flush-right" aria-hidden="true"> <button class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-sticky-header-search-toggle" tabindex="-1" data-event-name="ui.vector-sticky-search-form.icon"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </button> </div> <div role="search" class="vector-search-box-vue vector-search-box-show-thumbnail vector-search-box"> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail"> <form action="/w/index.php" id="vector-sticky-search-form" class="cdx-search-input cdx-search-input--has-end-button"> <div class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia"> <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <div class="vector-sticky-header-context-bar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-sticky-header-toc" class="vector-dropdown mw-portlet mw-portlet-sticky-header-toc vector-sticky-header-toc vector-button-flush-left" > <input type="checkbox" id="vector-sticky-header-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-sticky-header-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-sticky-header-toc-label" for="vector-sticky-header-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-sticky-header-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div class="vector-sticky-header-context-bar-primary" aria-hidden="true" ><span class="mw-page-title-main">DeepSeek</span></div> </div> </div> <div class="vector-sticky-header-end" aria-hidden="true"> <div class="vector-sticky-header-icons"> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-talk-sticky-header" tabindex="-1" data-event-name="talk-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbles mw-ui-icon-wikimedia-speechBubbles"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-subject-sticky-header" tabindex="-1" data-event-name="subject-sticky-header"><span class="vector-icon mw-ui-icon-article mw-ui-icon-wikimedia-article"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-history-sticky-header" tabindex="-1" data-event-name="history-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-history mw-ui-icon-wikimedia-wikimedia-history"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only mw-watchlink" id="ca-watchstar-sticky-header" tabindex="-1" data-event-name="watch-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-star mw-ui-icon-wikimedia-wikimedia-star"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-edit-sticky-header" tabindex="-1" data-event-name="wikitext-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-wikiText mw-ui-icon-wikimedia-wikimedia-wikiText"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-ve-edit-sticky-header" tabindex="-1" data-event-name="ve-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-edit mw-ui-icon-wikimedia-wikimedia-edit"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-viewsource-sticky-header" tabindex="-1" data-event-name="ve-edit-protected-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-editLock mw-ui-icon-wikimedia-wikimedia-editLock"></span> <span></span> </a> </div> <div class="vector-sticky-header-buttons"> <button class="cdx-button cdx-button--weight-quiet mw-interlanguage-selector" id="p-lang-btn-sticky-header" tabindex="-1" data-event-name="ui.dropdown-p-lang-btn-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-language mw-ui-icon-wikimedia-wikimedia-language"></span> <span>68 languages</span> </button> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive" id="ca-addsection-sticky-header" tabindex="-1" data-event-name="addsection-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbleAdd-progressive mw-ui-icon-wikimedia-speechBubbleAdd-progressive"></span> <span>Add topic</span> </a> </div> <div class="vector-sticky-header-icon-end"> <div class="vector-user-links"> </div> </div> </div> </div> </div> <div class="mw-portlet mw-portlet-dock-bottom emptyPortlet" id="p-dock-bottom"> <ul> </ul> </div> <script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.eqiad.main-75687f9f4b-pt2zn","wgBackendResponseTime":209,"wgPageParseReport":{"limitreport":{"cputime":"1.147","walltime":"1.471","ppvisitednodes":{"value":10899,"limit":1000000},"postexpandincludesize":{"value":221108,"limit":2097152},"templateargumentsize":{"value":9952,"limit":2097152},"expansiondepth":{"value":20,"limit":100},"expensivefunctioncount":{"value":12,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":324805,"limit":5000000},"entityaccesscount":{"value":1,"limit":400},"timingprofile":["100.00% 1238.704 1 -total"," 34.93% 432.731 3 Template:Reflist"," 18.71% 231.812 1 Template:Infobox_company"," 18.01% 223.062 1 Template:Infobox"," 17.14% 212.310 43 Template:Cite_web"," 7.39% 91.503 1 Template:Lang"," 6.66% 82.501 15 Template:Citation"," 6.37% 78.863 3 Template:Navbox"," 6.05% 74.972 1 Template:Generative_AI_chatbots"," 5.34% 66.165 1 Template:Short_description"]},"scribunto":{"limitreport-timeusage":{"value":"0.695","limit":"10.000"},"limitreport-memusage":{"value":19884055,"limit":52428800}},"cachereport":{"origin":"mw-web.eqiad.main-75687f9f4b-ss7tw","timestamp":"20250322065943","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"DeepSeek","url":"https:\/\/en.wikipedia.org\/wiki\/DeepSeek","sameAs":"http:\/\/www.wikidata.org\/entity\/Q131577453","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q131577453","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2024-11-26T14:36:48Z","dateModified":"2025-03-21T02:04:15Z","headline":"Chinese artificial intelligence company"}</script> </body> </html>