CINXE.COM
Data Platform - Wikitech
<!DOCTYPE html> <html class="client-nojs mf-expand-sections-clientpref-0 mf-font-size-clientpref-small mw-mf-amc-clientpref-0" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Data Platform - Wikitech</title> <script>(function(){var className="client-js mf-expand-sections-clientpref-0 mf-font-size-clientpref-small mw-mf-amc-clientpref-0";var cookie=document.cookie.match(/(?:^|; )labswikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"f4136ed3-5bbc-4333-832d-d6b1e1fc9fcd","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Data_Platform","wgTitle":"Data Platform","wgCurRevisionId":2224080,"wgRevisionId":2224080,"wgArticleId":454545,"wgIsArticle":true,"wgIsRedirect": false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Data_Platform","wgRelevantArticleId":454545,"wgIsProbablyEditable":false,"wgRelevantPageIsProbablyEditable":false,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikitech","wgCiteReferencePreviewsActive":false,"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFMode":"stable","wgMFAmc":false,"wgMFAmcOutreachActive":false,"wgMFAmcOutreachUserEligible":false,"wgMFLazyLoadImages":true,"wgMFEditNoticesFeatureConflict":false,"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgMFIsSupportedEditRequest":true,"wgMFScriptPath":"","wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":6000,"wgCentralAuthMobileDomain":false, "wgEditSubmitButtonLabelPublish":true,"wgDiscussionToolsFeaturesEnabled":{"replytool":true,"newtopictool":true,"sourcemodetoolbar":true,"topicsubscription":false,"autotopicsub":false,"visualenhancements":false,"visualenhancements_reply":false,"visualenhancements_pageframe":false},"wgDiscussionToolsFallbackEditMode":"visual","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"wgMinervaPermissions":{"watchable":true,"watch":false},"wgMinervaFeatures":{"beta":false,"donate":true,"mobileOptionsLink":true,"categories":false,"pageIssues":true,"talkAtTop":false,"historyInPageActions":false,"overflowSubmenu":false,"tabsOnSpecials":true,"personalMenu":false,"mainMenuExpanded":false,"echo":true,"nightMode":false},"wgMinervaDownloadNamespaces":[0],"wgSiteNoticeId":"2.0"};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready", "user.options":"loading","mediawiki.page.gallery.styles":"ready","ext.discussionTools.init.styles":"ready","oojs-ui-core.styles":"ready","oojs-ui.styles.indicators":"ready","mediawiki.widgets.styles":"ready","oojs-ui-core.icons":"ready","skins.minerva.styles":"ready","skins.minerva.content.styles.images":"ready","mediawiki.hlist":"ready","skins.minerva.codex.styles":"ready","skins.minerva.icons":"ready","jquery.tablesorter.styles":"ready","mediawiki.ui.button":"ready","ext.wikimediamessages.styles":"ready","mobile.init.styles":"ready","ext.dismissableSiteNotice.styles":"ready"};RLPAGEMODULES=["site","mediawiki.page.ready","jquery.tablesorter","skins.minerva.scripts","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.site","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mobile.init","ext.echo.centralauth","ext.discussionTools.init","ext.eventLogging","ext.wikimediaEvents","ext.checkUser.clientHints","ext.dismissableSiteNotice"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.discussionTools.init.styles%7Cext.dismissableSiteNotice.styles%7Cext.wikimediamessages.styles%7Cjquery.tablesorter.styles%7Cmediawiki.hlist%7Cmediawiki.page.gallery.styles%7Cmediawiki.ui.button%7Cmediawiki.widgets.styles%7Cmobile.init.styles%7Coojs-ui-core.icons%2Cstyles%7Coojs-ui.styles.indicators%7Cskins.minerva.codex.styles%7Cskins.minerva.content.styles.images%7Cskins.minerva.icons%2Cstyles&only=styles&skin=minerva"> <script async="" src="/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=minerva"></script> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta name="theme-color" content="#eaecf0"> <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0"> <meta property="og:title" content="Data Platform - Wikitech"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="manifest" href="/w/api.php?action=webapp-manifest"> <link rel="icon" href="/static/favicon/wikitech.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikitech (en)"> <link rel="EditURI" type="application/rsd+xml" href="//wikitech.wikimedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://wikitech.wikimedia.org/wiki/Data_Platform"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="ext-discussiontools-replytool-enabled ext-discussiontools-newtopictool-enabled ext-discussiontools-sourcemodetoolbar-enabled mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject page-Data_Platform rootpage-Data_Platform stable issues-group-B skin-minerva action-view skin--responsive mw-mf-amc-disabled mw-mf"><div id="mw-mf-viewport"> <div id="mw-mf-page-center"> <a class="mw-mf-page-center__mask" href="#"></a> <header class="header-container header-chrome"> <div class="minerva-header"> <nav class="navigation-drawer toggle-list view-border-box"> <input type="checkbox" id="main-menu-input" class="toggle-list__checkbox" role="button" aria-haspopup="true" aria-expanded="false" aria-labelledby="mw-mf-main-menu-button"> <label role="button" for="main-menu-input" id="mw-mf-main-menu-button" aria-hidden="true" data-event-name="ui.mainmenu" class="cdx-button cdx-button--size-large cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--icon-only cdx-button--weight-quiet toggle-list__toggle"> <span class="minerva-icon minerva-icon--menu"></span> <span></span> </label> <div id="mw-mf-page-left" class="menu view-border-box"> <ul id="p-navigation" class="toggle-list__list"> <li class="toggle-list-item "> <a class="toggle-list-item__anchor menu__item--home" href="/wiki/Main_Page" data-mw="interface"> <span class="minerva-icon minerva-icon--home"></span> <span class="toggle-list-item__label">Home</span> </a> </li> <li class="toggle-list-item "> <a class="toggle-list-item__anchor menu__item--random" href="/wiki/Special:Random" data-mw="interface"> <span class="minerva-icon minerva-icon--die"></span> <span class="toggle-list-item__label">Random</span> </a> </li> <li class="toggle-list-item skin-minerva-list-item-jsonly"> <a class="toggle-list-item__anchor menu__item--nearby" href="/wiki/Special:Nearby" data-event-name="menu.nearby" data-mw="interface"> <span class="minerva-icon minerva-icon--mapPin"></span> <span class="toggle-list-item__label">Nearby</span> </a> </li> </ul> <ul id="p-personal" class="toggle-list__list"> <li class="toggle-list-item "> <a class="toggle-list-item__anchor menu__item--login" href="/w/index.php?title=Special:UserLogin&returnto=Data+Platform&returntoquery=mobileaction%3Dtoggle_view_mobile" data-event-name="menu.login" data-mw="interface"> <span class="minerva-icon minerva-icon--logIn"></span> <span class="toggle-list-item__label">Log in</span> </a> </li> </ul> <ul id="pt-preferences" class="toggle-list__list"> <li class="toggle-list-item skin-minerva-list-item-jsonly"> <a class="toggle-list-item__anchor menu__item--settings" href="/w/index.php?title=Special:MobileOptions&returnto=Data+Platform" data-event-name="menu.settings" data-mw="interface"> <span class="minerva-icon minerva-icon--settings"></span> <span class="toggle-list-item__label">Settings</span> </a> </li> </ul> <ul id="p-donation" class="toggle-list__list"> <li class="toggle-list-item "> <a class="toggle-list-item__anchor menu__item--donate" href="https://donate.wikimedia.org/?utm_source=donate&utm_medium=sidebar&utm_campaign=spontaneous&uselang=en&utm_key=minerva" data-event-name="menu.donate" data-mw="interface"> <span class="minerva-icon minerva-icon--heart"></span> <span class="toggle-list-item__label">Donate</span> </a> </li> </ul> <ul class="hlist"> <li class="toggle-list-item "> <a class="toggle-list-item__anchor menu__item--about" href="/wiki/Main_Page" data-mw="interface"> <span class="toggle-list-item__label">About Wikitech</span> </a> </li> <li class="toggle-list-item "> <a class="toggle-list-item__anchor menu__item--disclaimers" href="https://foundation.wikimedia.org/wiki/General_disclaimer" data-mw="interface"> <span class="toggle-list-item__label">Disclaimers</span> </a> </li> </ul> </div> <label class="main-menu-mask" for="main-menu-input"></label> </nav> <div class="branding-box"> <a href="/wiki/Main_Page"> <span><img src="/static/images/mobile/copyright/wikitech-wordmark.svg" alt="Wikitech" width="140" height="27" style="width: 8.75em; height: 1.6875em;"/> </span> </a> </div> <form action="/w/index.php" method="get" class="minerva-search-form"> <div class="search-box"> <input type="hidden" name="title" value="Special:Search"/> <input class="search skin-minerva-search-trigger" id="searchInput" type="search" name="search" placeholder="Search Wikitech" aria-label="Search Wikitech" autocapitalize="sentences" title="Search Wikitech [f]" accesskey="f"> <span class="search-box-icon-overlay"><span class="minerva-icon minerva-icon--search"></span> </span> </div> <button id="searchIcon" class="cdx-button cdx-button--size-large cdx-button--icon-only cdx-button--weight-quiet skin-minerva-search-trigger"> <span class="minerva-icon minerva-icon--search"></span> <span>Search</span> </button> </form> <nav class="minerva-user-navigation" aria-label="User navigation"> </nav> </div> </header> <main id="content" class="mw-body"> <div class="banner-container"> <div id="siteNotice"></div> </div> <div class="pre-content heading-holder"> <div class="page-heading"> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Data Platform</span></h1> <div class="tagline"></div> </div> <nav class="page-actions-menu"> <ul id="p-views" class="page-actions-menu__list"> <li id="language-selector" class="page-actions-menu__list-item"> <a role="button" href="" data-mw="interface" data-event-name="menu.languages" title="Language" class="cdx-button cdx-button--size-large cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--icon-only cdx-button--weight-quiet language-selector disabled"> <span class="minerva-icon minerva-icon--language"></span> <span>Language</span> </a> </li> <li id="page-actions-watch" class="page-actions-menu__list-item"> <a role="button" id="ca-watch" href="/w/index.php?title=Special:UserLogin&returnto=Data+Platform" data-event-name="menu.watch" class="cdx-button cdx-button--size-large cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--icon-only cdx-button--weight-quiet menu__item--page-actions-watch"> <span class="minerva-icon minerva-icon--star"></span> <span>Watch</span> </a> </li> <li id="page-actions-viewsource" class="page-actions-menu__list-item"> <a role="button" id="ca-edit" href="/w/index.php?title=Data_Platform&action=edit" data-event-name="menu.viewsource" data-mw="interface" class="cdx-button cdx-button--size-large cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--icon-only cdx-button--weight-quiet edit-page menu__item--page-actions-viewsource"> <span class="minerva-icon minerva-icon--editLock"></span> <span>View source</span> </a> </li> </ul> </nav> <!-- version 1.0.2 (change every time you update a partial) --> <div id="mw-content-subtitle"></div> </div> <div id="bodyContent" class="content"> <div id="mw-content-text" class="mw-body-content"><script>function mfTempOpenSection(id){var block=document.getElementById("mf-section-"+id);block.className+=" open-block";block.previousSibling.className+=" open-block";}</script><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><section class="mf-section-0" id="mf-section-0"><style data-mw-deduplicate="TemplateStyles:r2199624">.mw-parser-output .mw-tpl-rellink{font-style:italic;padding-bottom:0.4em;padding-left:0;margin-bottom:0.4em;color:#555;border-bottom:1px solid var(--border-color-base,#a2a9b1)}</style> <div class="rellink mw-tpl-rellink">Not to be confused with <a href="/wiki/Portal:Data_Services" title="Portal:Data Services">Data Services</a> within <a href="/wiki/Help:Cloud_Services_introduction" title="Help:Cloud Services introduction">Wikimedia Cloud Services</a>.</div> <p>Wikimedia's Data Platform is a collection of systems and services that enable data producers and consumers to discover, use, and collect data to derive insights, conduct research, and build new data products. The Data Platform is primarily maintained by the <a href="/wiki/Data_Platform_Engineering" title="Data Platform Engineering">Data Platform Engineering team</a>. To contact the team, use the <a href="https://www.mediawiki.org/wiki/Data_Platform_Engineering/Intake_Process" class="extiw" title="mw:Data Platform Engineering/Intake Process">intake process</a>. </p> </section><div class="mw-heading mw-heading2 ext-discussiontools-init-section section-heading" onclick="mfTempOpenSection(1)"><span class="indicator mf-icon mf-icon-expand mf-icon--small"></span><h2 id="Get_started" data-mw-thread-id="h-Get_started"><span data-mw-comment-start="" id="h-Get_started"></span>Get started<span data-mw-comment-end="h-Get_started"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Get_started","replies":[]}}--></div><section class="mf-section-1 collapsible-block" id="mf-section-1"> <p>The Data Platform provides access to <b>private data</b> and <b>internal WMF resources</b>, so you must have <a href="/wiki/Data_Platform/Data_access" title="Data Platform/Data access"> specialized data access</a> to use it. For public, open access Wikimedia data and tools, see <a href="https://meta.wikimedia.org/wiki/Research:Data" class="extiw" title="meta:Research:Data">meta:Research:Data</a>. </p> <style data-mw-deduplicate="TemplateStyles:r2126319">.mw-parser-output .tpl-contentgrid{margin:1em 0;display:grid}.mw-parser-output .tpl-contentgrid .mw-tpl-colorbox{margin:0}</style> <div class="tpl-contentgrid" style="grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); grid-gap: 2em; grid-auto-rows: minmax(100px, auto); width: auto;"><div class="mw-tpl-colorbox" style=""><style data-mw-deduplicate="TemplateStyles:r2199608">.mw-parser-output .mw-tpl-colorbox{box-sizing:border-box;margin:0.5em 0.5em 1em 0.5em;border-radius:2px;overflow:hidden;background:var(--background-color-base,#fff);color:var(--color-base,#202122);border:1px solid var(--border-color-base,#a2a9b1);box-shadow:0 2px 2px rgba(0,0,0,0.2)}.mw-parser-output .mw-tpl-colorbox-title{background:var(--background-color-interactive,#eaecf0);color:var(--color-emphasized,#000000);display:flex;gap:0.5rem;padding-top:0.4rem;padding-bottom:0.4rem}.mw-parser-output .mw-tpl-colorbox-title--linked:hover{background:var(--background-color-progressive-subtle,#eaf3ff)}.mw-parser-output .mw-tpl-colorbox-title>*:first-child{padding-left:1rem}.mw-parser-output .mw-tpl-colorbox-title>*:last-child{padding-right:1rem}.mw-parser-output .mw-tpl-colorbox-title>strong,.mw-parser-output .mw-tpl-colorbox-title>a{flex-grow:1}.mw-parser-output .mw-tpl-colorbox-title-icon{opacity:0.8}.mw-parser-output .mw-tpl-colorbox-title-corner{float:right;font-size:0.7em}.mw-parser-output .mw-tpl-colorbox-content{padding:0.5rem 1rem}.mw-parser-output .mw-tpl-colorbox-content::after{content:"";display:block;clear:both}</style><div class="mw-tpl-colorbox-title mw-tpl-colorbox-title--linked" style=""><a href="/wiki/Data_Platform/Discover_data" title="Data Platform/Discover data"><strong>Discover data</strong></a> </div><div class="mw-tpl-colorbox-content"> <p>Find datasets and documentation for WMF private data sources. </p> </div> </div> <div class="mw-tpl-colorbox" style=""><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r2199608"><div class="mw-tpl-colorbox-title mw-tpl-colorbox-title--linked" style=""><a href="/wiki/Data_Platform/Analyze_data" title="Data Platform/Analyze data"><strong>Access and query data</strong></a> </div><div class="mw-tpl-colorbox-content"> <p>Use SQL query engines, Jupyter notebooks, libraries, and compute resources to explore and analyze data. </p> </div> </div> <div class="mw-tpl-colorbox" style=""><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r2199608"><div class="mw-tpl-colorbox-title mw-tpl-colorbox-title--linked" style=""><a href="/wiki/Data_Platform/Transform_data" title="Data Platform/Transform data"><strong>Transform and publish data</strong></a> </div><div class="mw-tpl-colorbox-content"> <p>Create and share derivative datasets, reports, and dashboards based on existing Wikimedia data sources. </p> </div> </div> <div class="mw-tpl-colorbox" style=""><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r2199608"><div class="mw-tpl-colorbox-title mw-tpl-colorbox-title--linked" style=""><a href="/wiki/Metrics_Platform" title="Metrics Platform"><strong>Collect data</strong></a> </div><div class="mw-tpl-colorbox-content"> <p>Use the <a href="/wiki/Metrics_Platform" title="Metrics Platform">Metrics Platform</a> to configure instruments and collect analytics data. </p> <ul><li>Advanced users: use the <a href="/wiki/Event_Platform" title="Event Platform">Event Platform</a> to configure and deploy event streams.</li></ul> </div> </div></div> <p><span id="Data_Engineering"></span> </p> </section><div class="mw-heading mw-heading2 ext-discussiontools-init-section section-heading" onclick="mfTempOpenSection(2)"><span class="indicator mf-icon mf-icon-expand mf-icon--small"></span><h2 id="Data_platform_infrastructure" data-mw-thread-id="h-Data_platform_infrastructure"><span data-mw-comment-start="" id="h-Data_platform_infrastructure"></span>Data platform infrastructure<span data-mw-comment-end="h-Data_platform_infrastructure"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Data_platform_infrastructure","replies":["h-Data_pipelines-Data_platform_infrastructure","h-Search_data_and_services-Data_platform_infrastructure","h-Overview_of_data_platform_systems-Data_platform_infrastructure"]}}--></div><section class="mf-section-2 collapsible-block" id="mf-section-2"> <p>Data platform systems and infrastructure include the data lake, ingestion and processing pipelines, and production search and query services. </p> <div class="mw-heading mw-heading3"><h3 id="Data_pipelines" data-mw-thread-id="h-Data_pipelines-Data_platform_infrastructure"><span data-mw-comment-start="" id="h-Data_pipelines-Data_platform_infrastructure"></span>Data pipelines<span data-mw-comment-end="h-Data_pipelines-Data_platform_infrastructure"></span></h3></div> <p>Information about data pipelines is currently at: </p> <ul><li><a href="/wiki/Data_Platform/Systems/Cluster" title="Data Platform/Systems/Cluster">Data Platform/Systems/Cluster</a></li> <li><a href="/wiki/Data_Platform/Systems/Airflow" title="Data Platform/Systems/Airflow">Airflow</a></li> <li><a href="/wiki/Data_Platform/Systems/Hadoop_Event_Ingestion_Lifecycle" title="Data Platform/Systems/Hadoop Event Ingestion Lifecycle">Hadoop Event Ingestion</a></li> <li><a href="/wiki/Category:Data_pipelines" title="Category:Data pipelines">Category:Data_pipelines</a></li></ul> <div class="mw-heading mw-heading3"><h3 id="Search_data_and_services" data-mw-thread-id="h-Search_data_and_services-Data_platform_infrastructure"><span data-mw-comment-start="" id="h-Search_data_and_services-Data_platform_infrastructure"></span>Search data and services<span data-mw-comment-end="h-Search_data_and_services-Data_platform_infrastructure"></span></h3></div> <ul><li><a href="/wiki/Search/Technical_interactions" title="Search/Technical interactions"> Using search for new features </a></li> <li><a href="/wiki/Search_Platform/Documentation#Search" title="Search Platform/Documentation"> Search Platform </a></li> <li><a href="/wiki/Wikidata_Query_Service" title="Wikidata Query Service"> Wikidata Query Service (WDQS) </a></li></ul> <div class="mw-heading mw-heading3"><h3 id="Overview_of_data_platform_systems" data-mw-thread-id="h-Overview_of_data_platform_systems-Data_platform_infrastructure"><span data-mw-comment-start="" id="h-Overview_of_data_platform_systems-Data_platform_infrastructure"></span>Overview of data platform systems<span data-mw-comment-end="h-Overview_of_data_platform_systems-Data_platform_infrastructure"></span></h3></div> <ul class="gallery mw-gallery-traditional"> <li class="gallerybox" style="width: 155px"> <div class="thumb" style="width: 150px; height: 150px;"><span typeof="mw:File"><a href="/wiki/File:WMF_Data_Platform_Technical_Overview_2023_V1.jpg" class="mw-file-description" title="Data Platform Technical Overview 2023"><noscript><img alt="Data Platform Technical Overview 2023" src="//upload.wikimedia.org/wikipedia/commons/thumb/d/da/WMF_Data_Platform_Technical_Overview_2023_V1.jpg/120px-WMF_Data_Platform_Technical_Overview_2023_V1.jpg" decoding="async" width="120" height="80" class="mw-file-element" data-file-width="5386" data-file-height="3594"></noscript><span class="lazy-image-placeholder" style="width: 120px;height: 80px;" data-src="//upload.wikimedia.org/wikipedia/commons/thumb/d/da/WMF_Data_Platform_Technical_Overview_2023_V1.jpg/120px-WMF_Data_Platform_Technical_Overview_2023_V1.jpg" data-alt="Data Platform Technical Overview 2023" data-width="120" data-height="80" data-srcset="//upload.wikimedia.org/wikipedia/commons/thumb/d/da/WMF_Data_Platform_Technical_Overview_2023_V1.jpg/180px-WMF_Data_Platform_Technical_Overview_2023_V1.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/d/da/WMF_Data_Platform_Technical_Overview_2023_V1.jpg/240px-WMF_Data_Platform_Technical_Overview_2023_V1.jpg 2x" data-class="mw-file-element"> </span></a></span></div> <div class="gallerytext">Data Platform Technical Overview 2023</div> </li> <li class="gallerybox" style="width: 155px"> <div class="thumb" style="width: 150px; height: 150px;"><span typeof="mw:File"><a href="/wiki/File:WMF_Analytics_Data_Platform_2021_v1.png" class="mw-file-description" title="Analytics Data Platform 2021"><noscript><img alt="Analytics Data Platform 2021" src="//upload.wikimedia.org/wikipedia/labs/thumb/5/5f/WMF_Analytics_Data_Platform_2021_v1.png/120px-WMF_Analytics_Data_Platform_2021_v1.png" decoding="async" width="120" height="72" class="mw-file-element" data-file-width="596" data-file-height="360"></noscript><span class="lazy-image-placeholder" style="width: 120px;height: 72px;" data-src="//upload.wikimedia.org/wikipedia/labs/thumb/5/5f/WMF_Analytics_Data_Platform_2021_v1.png/120px-WMF_Analytics_Data_Platform_2021_v1.png" data-alt="Analytics Data Platform 2021" data-width="120" data-height="72" data-srcset="//upload.wikimedia.org/wikipedia/labs/thumb/5/5f/WMF_Analytics_Data_Platform_2021_v1.png/180px-WMF_Analytics_Data_Platform_2021_v1.png 1.5x, //upload.wikimedia.org/wikipedia/labs/thumb/5/5f/WMF_Analytics_Data_Platform_2021_v1.png/240px-WMF_Analytics_Data_Platform_2021_v1.png 2x" data-class="mw-file-element"> </span></a></span></div> <div class="gallerytext">Analytics Data Platform 2021</div> </li> </ul> <p>The following list highlights some major Data Platform systems. For more details and a full list of Data Platform system documentation pages on this wiki, see <a href="/wiki/Data_Platform/Systems" title="Data Platform/Systems">Data_Platform/Systems</a>. </p> <table class="wikitable sortable"> <tbody><tr> <th>System name and link </th> <th>Type </th> <th><a href="/wiki/Data_Platform/Data_access" title="Data Platform/Data access">Accessibility</a> </th></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Airflow" title="Data Platform/Systems/Airflow">Airflow</a> </td> <td>Workflow Job Scheduler </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Archiva" class="mw-redirect" title="Archiva">Archiva</a> </td> <td>Repository for Java archives </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/AQS" title="Data Platform/Systems/AQS">AQS - <u>A</u>nalytics <u>Q</u>uery <u>S</u>ervice</a> </td> <td>REST API for analytics data </td> <td>Public </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Ceph" title="Data Platform/Systems/Ceph">Ceph</a> </td> <td>Software defined storage, serving block and object storage </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Clients" title="Data Platform/Systems/Clients">Clients (stat100X)</a> </td> <td>Analytics client nodes to access Hadoop and various services </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Cluster" title="Data Platform/Systems/Cluster">Cluster (Hadoop, Gobblin, Hive, Spark...)</a> </td> <td>Hadoop </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/DataHub" title="Data Platform/Systems/DataHub">Datahub</a> </td> <td>Data Catalog </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Dashiki" title="Data Platform/Systems/Dashiki">Dashiki</a> </td> <td>Framework for building dashboards </td> <td>Public </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Druid" title="Data Platform/Systems/Druid">Druid</a> </td> <td>Data storage engine optimized for exploratory analytics </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/EventLogging" title="Data Platform/Systems/EventLogging">EventLogging</a> </td> <td>Ad-hoc streaming pipeline </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/EventStreams" class="mw-redirect" title="Data Platform/Systems/EventStreams">EventStreams</a> </td> <td>Mediawiki events streams </td> <td>Public </td></tr> <tr> <td><a href="/wiki/Kafka" title="Kafka">Kafka</a> </td> <td>Data transport and streaming system </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/MariaDB" title="Data Platform/Systems/MariaDB">MariaDB</a> </td> <td>Data storage for MediaWiki replicas and EventLogging </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Piwik" class="mw-redirect" title="Data Platform/Systems/Piwik">Matomo</a> (formerly known as Piwik) </td> <td>Small-scale web analytics platform </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Presto" title="Data Platform/Systems/Presto">Presto</a> </td> <td>Big data high performance sql query engine </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Reportupdater" title="Data Platform/Systems/Reportupdater">ReportUpdater</a> </td> <td>Job Scheduler </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Superset" title="Data Platform/Systems/Superset">Superset</a> </td> <td>Web interface for data visualization and exploration </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Jupyter" title="Data Platform/Systems/Jupyter">Jupyter</a> </td> <td>Hosted notebooks for data analysis </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Turnilo-Pivot" class="mw-redirect" title="Data Platform/Systems/Turnilo-Pivot">Turnilo</a> </td> <td>Web interface for exploring data stored in Druid </td> <td>Private </td></tr> <tr> <td><a href="/wiki/Data_Platform/Systems/Wikistats_2" title="Data Platform/Systems/Wikistats 2">Wikistats</a> (1 and 2) </td> <td>Community Dashboard with high-level metrics </td> <td>Public </td></tr> <tr> <td><a href="/wiki/Wmfdata" class="mw-redirect" title="Wmfdata">Wmfdata-Python</a> </td> <td>Python package for streamlined data access on the <a href="/wiki/Data_Platform/Systems/Clients" title="Data Platform/Systems/Clients">analytics clients</a> </td> <td>Private </td></tr></tbody></table> <div style="text-align:center;"> <p><span class="plainlinks"><a class="external text" href="https://wikitech.wikimedia.org/wiki/Data_Platform/Systems"><span class="mw-ui-button mw-ui-progressive" role="button">Full list of Data Platform systems</span></a></span> </p> </div> </section><div class="mw-heading mw-heading2 ext-discussiontools-init-section section-heading" onclick="mfTempOpenSection(3)"><span class="indicator mf-icon mf-icon-expand mf-icon--small"></span><h2 id="Data_platform_operations" data-mw-thread-id="h-Data_platform_operations"><span data-mw-comment-start="" id="h-Data_platform_operations"></span>Data platform operations<span data-mw-comment-end="h-Data_platform_operations"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Data_platform_operations","replies":[]}}--></div><section class="mf-section-3 collapsible-block" id="mf-section-3"> <p>Find <a href="/wiki/Data_Platform_Engineering/Ops_week" title="Data Platform Engineering/Ops week">ops week</a> and other process documentation at <a href="/wiki/Data_Platform_Engineering" title="Data Platform Engineering">Data Platform Engineering on Wikitech</a> and the <a href="https://www.mediawiki.org/wiki/Data_Platform_Engineeringteam/" class="extiw" title="mw:Data Platform Engineeringteam/">project pages on MediaWiki.org</a>. </p><p>The list of scheduled manual maintenance tasks are documented at <a href="/wiki/Data_Platform/Systems/Manual_maintenance" title="Data Platform/Systems/Manual maintenance">Data Platform/Systems/Manual maintenance</a> </p> <!-- NewPP limit report Parsed by mw‐web.eqiad.main‐5dc468848‐kjt4l Cached time: 20241123114631 Cache expiry: 2592000 Reduced expiry: false Complications: [no‐toc] DiscussionTools time usage: 0.011 seconds CPU time usage: 0.071 seconds Real time usage: 0.105 seconds Preprocessor visited node count: 286/1000000 Post‐expand include size: 4501/2097152 bytes Template argument size: 2571/2097152 bytes Highest expansion depth: 7/100 Expensive parser function count: 0/500 Unstrip recursion depth: 0/20 Unstrip post‐expand size: 7444/5000000 bytes Lua time usage: 0.004/10.000 seconds Lua memory usage: 513906/52428800 bytes --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 71.989 1 -total 29.45% 21.204 1 Template:ContentGrid 27.12% 19.520 1 Template:Clickable_button_2 20.01% 14.404 4 Template:Colored_box 17.81% 12.819 1 Template:Hatnote 4.51% 3.247 1 Template:Anchor --> <!-- Saved in parser cache with key labswiki:pcache:idhash:454545-0!canonical and timestamp 20241123114631 and revision id 2224080. Rendering was triggered because: page-view --> </section></div> <!-- MobileFormatter took 0.007 seconds --><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://wikitech.wikimedia.org/w/index.php?title=Data_Platform&oldid=2224080">https://wikitech.wikimedia.org/w/index.php?title=Data_Platform&oldid=2224080</a>"</div></div> </div> <div class="post-content" id="page-secondary-actions"> </div> </main> <footer class="mw-footer minerva-footer" role="contentinfo"> <a class="last-modified-bar" href="/w/index.php?title=Data_Platform&action=history"> <div class="post-content last-modified-bar__content"> <span class="minerva-icon minerva-icon-size-medium minerva-icon--modified-history"></span> <span class="last-modified-bar__text modified-enhancement" data-user-name="BTullis (WMF)" data-user-gender="male" data-timestamp="1725701198"> <span>Last edited on 7 September 2024, at 09:26</span> </span> <span class="minerva-icon minerva-icon-size-small minerva-icon--expand"></span> </div> </a> <div class="post-content footer-content"> <div id="p-lang"> <h4>Languages</h4> <section> <ul id="p-variants" class="minerva-languages"></ul> <p>This page is not available in other languages.</p> </section> </div> <div class="minerva-footer-logo"><img src="/static/images/mobile/copyright/wikitech-wordmark.svg" alt="Wikitech" width="140" height="27" style="width: 8.75em; height: 1.6875em;"/> </div> <ul id="footer-info" class="footer-info hlist hlist-separated"> <li id="footer-info-lastmod"> This page was last edited on 7 September 2024, at 09:26.</li> <li id="footer-info-copyright">Content is available under <a class="external" rel="nofollow" href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a> unless otherwise noted.</li> </ul> <ul id="footer-places" class="footer-places hlist hlist-separated"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Main_Page">About Wikitech</a></li> <li id="footer-places-disclaimers"><a href="https://foundation.wikimedia.org/wiki/General_disclaimer">Disclaimers</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://www.mediawiki.org/wiki/Special:MyLanguage/Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/wikitech.wikimedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-terms-use"><a href="https://foundation.m.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use">Terms of Use</a></li> <li id="footer-places-desktop-toggle"><a id="mw-mf-display-toggle" href="//wikitech.wikimedia.org/w/index.php?title=Data_Platform&mobileaction=toggle_view_desktop" data-event-name="switch_to_desktop">Desktop</a></li> </ul> </div> </footer> </div> </div> <div class="mw-notification-area" data-mw="interface"></div> <!-- v:8.3.1 --> <script>(RLQ=window.RLQ||[]).push(function(){mw.log.warn("This page is using the deprecated ResourceLoader module \"mediawiki.ui.button\".\n[1.41] Please use Codex. See migration guidelines: https://www.mediawiki.org/wiki/Codex/Migrating_from_MediaWiki_UI");mw.config.set({"wgHostname":"mw-web.codfw.main-f69cdc8f6-gqt92","wgBackendResponseTime":120,"wgDiscussionToolsPageThreads":[{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Get_started","replies":[]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Data_platform_infrastructure","replies":[{"headingLevel":3,"name":"h-","type":"heading","level":0,"id":"h-Data_pipelines-Data_platform_infrastructure","replies":[]},{"headingLevel":3,"name":"h-","type":"heading","level":0,"id":"h-Search_data_and_services-Data_platform_infrastructure","replies":[]},{"headingLevel":3,"name":"h-","type":"heading","level":0,"id":"h-Overview_of_data_platform_systems-Data_platform_infrastructure","replies":[]}]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Data_platform_operations","replies":[]}],"wgPageParseReport":{"discussiontools":{"limitreport-timeusage":"0.011"},"limitreport":{"cputime":"0.071","walltime":"0.105","ppvisitednodes":{"value":286,"limit":1000000},"postexpandincludesize":{"value":4501,"limit":2097152},"templateargumentsize":{"value":2571,"limit":2097152},"expansiondepth":{"value":7,"limit":100},"expensivefunctioncount":{"value":0,"limit":500},"unstrip-depth":{"value":0,"limit":20},"unstrip-size":{"value":7444,"limit":5000000},"timingprofile":["100.00% 71.989 1 -total"," 29.45% 21.204 1 Template:ContentGrid"," 27.12% 19.520 1 Template:Clickable_button_2"," 20.01% 14.404 4 Template:Colored_box"," 17.81% 12.819 1 Template:Hatnote"," 4.51% 3.247 1 Template:Anchor"]},"scribunto":{"limitreport-timeusage":{"value":"0.004","limit":"10.000"},"limitreport-memusage":{"value":513906,"limit":52428800}},"cachereport":{"origin":"mw-web.eqiad.main-5dc468848-kjt4l","timestamp":"20241123114631","ttl":2592000,"transientcontent":false}}});});</script> <script>(window.NORLQ=window.NORLQ||[]).push(function(){var ns,i,p,img;ns=document.getElementsByTagName('noscript');for(i=0;i<ns.length;i++){p=ns[i].nextSibling;if(p&&p.className&&p.className.indexOf('lazy-image-placeholder')>-1){img=document.createElement('img');img.setAttribute('src',p.getAttribute('data-src'));img.setAttribute('width',p.getAttribute('data-width'));img.setAttribute('height',p.getAttribute('data-height'));img.setAttribute('alt',p.getAttribute('data-alt'));p.parentNode.replaceChild(img,p);}}});</script> </body> </html>