CINXE.COM

Data Platform/Systems/Clients - Wikitech

<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-disabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Data Platform/Systems/Clients - Wikitech</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-disabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )labswikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat": "dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"034fcab7-426f-4555-b4df-3642ea4439bb","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Data_Platform/Systems/Clients","wgTitle":"Data Platform/Systems/Clients","wgCurRevisionId":2241496,"wgRevisionId":2241496,"wgArticleId":444523,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Data platform","Data platform systems"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Data_Platform/Systems/Clients","wgRelevantArticleId":444523,"wgIsProbablyEditable":false,"wgRelevantPageIsProbablyEditable":false,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgRedirectedFrom":"Analytics/Systems/Clients","wgNoticeProject":"wikitech","wgCiteReferencePreviewsActive":true, "wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":10000,"wgInternalRedirectTargetUrl":"/wiki/Data_Platform/Systems/Clients","wgCentralAuthMobileDomain":false,"wgEditSubmitButtonLabelPublish":true,"wgDiscussionToolsFeaturesEnabled":{"replytool":true,"newtopictool":true,"sourcemodetoolbar":true,"topicsubscription":false,"autotopicsub":false,"visualenhancements":false,"visualenhancements_reply":false,"visualenhancements_pageframe":false},"wgDiscussionToolsFallbackEditMode":"visual","wgULSPosition":"personal","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model", "platform","platformVersion"],"wgSiteNoticeId":"2.0"};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.inputBox.styles":"ready","ext.pygments":"ready","ext.discussionTools.init.styles":"ready","oojs-ui-core.styles":"ready","oojs-ui.styles.indicators":"ready","mediawiki.widgets.styles":"ready","oojs-ui-core.icons":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.pt":"ready","ext.dismissableSiteNotice.styles":"ready"};RLPAGEMODULES=["mediawiki.action.view.redirect","ext.pygments.view","site","mediawiki.page.ready","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.site","ext.urlShortener.toolbar","ext.centralauth.centralautologin", "ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.discussionTools.init","ext.eventLogging","ext.wikimediaEvents","ext.uls.interface","ext.checkUser.clientHints","ext.dismissableSiteNotice"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=ext.discussionTools.init.styles%7Cext.dismissableSiteNotice.styles%7Cext.inputBox.styles%7Cext.pygments%7Cext.uls.pt%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediamessages.styles%7Cmediawiki.widgets.styles%7Coojs-ui-core.icons%2Cstyles%7Coojs-ui.styles.indicators%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles&amp;only=styles&amp;skin=vector-2022"> <script async="" src="/w/load.php?lang=en&amp;modules=startup&amp;only=scripts&amp;raw=1&amp;skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=site.styles&amp;only=styles&amp;skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Data Platform/Systems/Clients - Wikitech"> <meta property="og:type" content="website"> <link rel="icon" href="/static/favicon/wikitech.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikitech (en)"> <link rel="EditURI" type="application/rsd+xml" href="//wikitech.wikimedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://wikitech.wikimedia.org/wiki/Data_Platform/Systems/Clients"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/"> <link rel="alternate" type="application/atom+xml" title="Wikitech Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="ext-discussiontools-replytool-enabled ext-discussiontools-newtopictool-enabled ext-discussiontools-sourcemodetoolbar-enabled skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject page-Data_Platform_Systems_Clients rootpage-Data_Platform skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes in the wiki [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-Server-admin-log:-Prod" class="mw-list-item"><a href="/wiki/Server_Admin_Log"><span>Server admin log: Prod</span></a></li><li id="n-Admin-log:-RelEng" class="mw-list-item"><a href="/wiki/Release_Engineering/SAL"><span>Admin log: RelEng</span></a></li><li id="n-Incident-status" class="mw-list-item"><a href="/wiki/Incident_status"><span>Incident status</span></a></li><li id="n-Deployments" class="mw-list-item"><a href="/wiki/Deployments"><span>Deployments</span></a></li><li id="n-SRE-Team-Help" class="mw-list-item"><a href="/wiki/SRE/SRE_Team_requests"><span>SRE Team Help</span></a></li> </ul> </div> </div> <div id="p-Cloud_VPS_&amp;_Toolforge" class="vector-menu mw-portlet mw-portlet-Cloud_VPS_Toolforge" > <div class="vector-menu-heading"> Cloud VPS &amp; Toolforge </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-Cloud-VPS-portal" class="mw-list-item"><a href="/wiki/Portal:Cloud_VPS"><span>Cloud VPS portal</span></a></li><li id="n-Toolforge-portal" class="mw-list-item"><a href="/wiki/Portal:Toolforge"><span>Toolforge portal</span></a></li><li id="n-Request-VPS-project" class="mw-list-item"><a href="https://phabricator.wikimedia.org/project/view/2875/"><span>Request VPS project</span></a></li><li id="n-Admin-log:-Cloud-VPS" class="mw-list-item"><a href="/wiki/Cloud_VPS_Server_Admin_Log"><span>Admin log: Cloud VPS</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikitech.svg" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikitech" src="/static/images/mobile/copyright/wikitech-wordmark.svg" style="width: 8.75em; height: 1.6875em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikitech [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikitech" aria-label="Search Wikitech" autocapitalize="sentences" title="Search Wikitech [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-uls" class="mw-list-item active user-links-collapsible-item"><a data-mw="interface" href="#" class="uls-trigger cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet"><span class="vector-icon mw-ui-icon-wikimedia-language mw-ui-icon-wikimedia-wikimedia-language"></span> <span>English</span></a> </li> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page&#039;s font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=spontaneous&amp;uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&amp;returnto=Data+Platform%2FSystems%2FClients" title="You are encouraged to log in; however, it is not mandatory [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out user-links-collapsible-item" title="More options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=spontaneous&amp;uselang=en"><span>Donate</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Data+Platform%2FSystems%2FClients" title="You are encouraged to log in; however, it is not mandatory [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><div id="mw-dismissablenotice-anonplace"></div><script>(function(){var node=document.getElementById("mw-dismissablenotice-anonplace");if(node){node.outerHTML="\u003Cdiv class=\"mw-dismissable-notice\"\u003E\u003Cdiv class=\"mw-dismissable-notice-close\"\u003E[\u003Ca tabindex=\"0\" role=\"button\"\u003Edismiss\u003C/a\u003E]\u003C/div\u003E\u003Cdiv class=\"mw-dismissable-notice-body\"\u003E\u003C!-- CentralNotice --\u003E\u003Cdiv id=\"localNotice\" data-nosnippet=\"\"\u003E\u003Cdiv class=\"sitenotice\" lang=\"en\" dir=\"ltr\"\u003E\u003Ctable style=\"width: 75%; background-color: var(--background-color-warning-subtle, #fdf2d5); border: var(--border-subtle, 1px solid #987027); color: var(--color-base, #202122); border-radius: 10px; padding: 5px; margin: 0 auto;\"\u003E\n\u003Ctbody\u003E\u003Ctr\u003E\n\u003Ctd style=\"width:40px; height:40px; text-align:center; vertical-align:middle; padding: 2px;\"\u003E\u003Cspan typeof=\"mw:File\"\u003E\u003Ca href=\"/wiki/File:OOjs_UI_icon_alert-warning.svg\" class=\"mw-file-description\"\u003E\u003Cimg src=\"//upload.wikimedia.org/wikipedia/commons/thumb/3/3b/OOjs_UI_icon_alert-warning.svg/30px-OOjs_UI_icon_alert-warning.svg.png\" decoding=\"async\" width=\"30\" height=\"30\" class=\"mw-file-element\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/3/3b/OOjs_UI_icon_alert-warning.svg/45px-OOjs_UI_icon_alert-warning.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/3b/OOjs_UI_icon_alert-warning.svg/60px-OOjs_UI_icon_alert-warning.svg.png 2x\" data-file-width=\"20\" data-file-height=\"20\" /\u003E\u003C/a\u003E\u003C/span\u003E\n\u003C/td\u003E\n\u003Ctd style=\"text-align:center; vertical-align: middle; padding: 4px; max-height: 60px;\"\u003E\u003Cb\u003EWe are migrating Wikitech to \u003Ca href=\"/wiki/Wikitech/SUL-migration\" title=\"Wikitech/SUL-migration\"\u003ESUL\u003C/a\u003E!\u003C/b\u003E\n\u003Cp\u003E\u003Cb\u003EAction may be required for your \u003Ca href=\"/wiki/Wikitech/SUL-migration#What_You_Should_Do\" title=\"Wikitech/SUL-migration\"\u003E account\u003C/a\u003E!\u003C/b\u003E\n\u003C/p\u003E\u003Cp\u003E\u003Cb\u003ETrouble logging in? Please visit \u003Ca href=\"https://phabricator.wikimedia.org/T376267\" class=\"extiw\" title=\"phab:T376267\"\u003ET376267\u003C/a\u003E\u003C/b\u003E\n\u003C/p\u003E\n\u003C/td\u003E\u003C/tr\u003E\u003C/tbody\u003E\u003C/table\u003E\u003C/div\u003E\u003C/div\u003E\u003C/div\u003E\u003C/div\u003E";}}());</script></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">Beginning</div> </a> </li> <li id="toc-Jupyter" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Jupyter"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Jupyter</span> </div> </a> <ul id="toc-Jupyter-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Conda" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Conda"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Conda</span> </div> </a> <ul id="toc-Conda-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Querying_data" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Querying_data"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Querying data</span> </div> </a> <button aria-controls="toc-Querying_data-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Querying data subsection</span> </button> <ul id="toc-Querying_data-sublist" class="vector-toc-list"> <li id="toc-Python" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Python"> <div class="vector-toc-text"> <span class="vector-toc-numb">3.1</span> <span>Python</span> </div> </a> <ul id="toc-Python-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-R" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#R"> <div class="vector-toc-text"> <span class="vector-toc-numb">3.2</span> <span>R</span> </div> </a> <ul id="toc-R-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Internet_access" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Internet_access"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Internet access</span> </div> </a> <ul id="toc-Internet_access-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Resource_management" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Resource_management"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>Resource management</span> </div> </a> <ul id="toc-Resource_management-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Local_data_storage" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Local_data_storage"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>Local data storage</span> </div> </a> <button aria-controls="toc-Local_data_storage-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Local data storage subsection</span> </button> <ul id="toc-Local_data_storage-sublist" class="vector-toc-list"> <li id="toc-Checking_for_available_disk_space" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Checking_for_available_disk_space"> <div class="vector-toc-text"> <span class="vector-toc-numb">6.1</span> <span>Checking for available disk space</span> </div> </a> <ul id="toc-Checking_for_available_disk_space-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Checking_the_space_used_by_your_files" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Checking_the_space_used_by_your_files"> <div class="vector-toc-text"> <span class="vector-toc-numb">6.2</span> <span>Checking the space used by your files</span> </div> </a> <ul id="toc-Checking_the_space_used_by_your_files-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Web_publication" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Web_publication"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>Web publication</span> </div> </a> <ul id="toc-Web_publication-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-GPU_usage" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#GPU_usage"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>GPU usage</span> </div> </a> <ul id="toc-GPU_usage-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Rsync_between_clients" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Rsync_between_clients"> <div class="vector-toc-text"> <span class="vector-toc-numb">9</span> <span>Rsync between clients</span> </div> </a> <ul id="toc-Rsync_between_clients-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Common_workflows_(WIP)" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Common_workflows_(WIP)"> <div class="vector-toc-text"> <span class="vector-toc-numb">10</span> <span>Common workflows (WIP)</span> </div> </a> <button aria-controls="toc-Common_workflows_(WIP)-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Common workflows (WIP) subsection</span> </button> <ul id="toc-Common_workflows_(WIP)-sublist" class="vector-toc-list"> <li id="toc-Spark_jobs" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Spark_jobs"> <div class="vector-toc-text"> <span class="vector-toc-numb">10.1</span> <span>Spark jobs</span> </div> </a> <ul id="toc-Spark_jobs-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Data Platform/Systems/Clients</span></h1> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Data_Platform/Systems/Clients" title="View the content page [c]" accesskey="c"><span>Page</span></a></li><li id="ca-talk" class="new vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Talk:Data_Platform/Systems/Clients&amp;action=edit&amp;redlink=1" rel="discussion" class="new" title="Discussion about the content page (page does not exist) [t]" accesskey="t"><span>Discussion</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Data_Platform/Systems/Clients"><span>Read</span></a></li><li id="ca-viewsource" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Data_Platform/Systems/Clients&amp;action=edit" title="This page is protected.&#10;You can view its source [e]" accesskey="e"><span>View source</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Data_Platform/Systems/Clients&amp;action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Data_Platform/Systems/Clients"><span>Read</span></a></li><li id="ca-more-viewsource" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Data_Platform/Systems/Clients&amp;action=edit"><span>View source</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Data_Platform/Systems/Clients&amp;action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Data_Platform/Systems/Clients" title="A list of all wiki pages that link here [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Data_Platform/Systems/Clients" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Data_Platform/Systems/Clients&amp;oldid=2241496" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Data_Platform/Systems/Clients&amp;action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&amp;page=Data_Platform%2FSystems%2FClients&amp;id=2241496&amp;wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&amp;url=https%3A%2F%2Fwikitech.wikimedia.org%2Fwiki%2FData_Platform%2FSystems%2FClients"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&amp;url=https%3A%2F%2Fwikitech.wikimedia.org%2Fwiki%2FData_Platform%2FSystems%2FClients"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-create_a_book" class="mw-list-item"><a href="/w/index.php?title=Special:Book&amp;bookcmd=book_creator&amp;referer=Data+Platform%2FSystems%2FClients"><span>Create a book</span></a></li><li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&amp;page=Data_Platform%2FSystems%2FClients&amp;action=show-download-screen"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Data_Platform/Systems/Clients&amp;printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikitech</div> </div> <div id="contentSub"><div id="mw-content-subtitle"><div class="subpages">&lt; <bdi dir="ltr"><a href="/wiki/Data_Platform" title="Data Platform">Data Platform</a></bdi> | <bdi dir="ltr"><a href="/wiki/Data_Platform/Systems" title="Data Platform/Systems">Systems</a></bdi></div><span class="mw-redirectedfrom">(Redirected from <a href="/w/index.php?title=Analytics/Systems/Clients&amp;redirect=no" class="mw-redirect" title="Analytics/Systems/Clients">Analytics/Systems/Clients</a>)</span></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><style data-mw-deduplicate="TemplateStyles:r2232773">.mw-parser-output .tpl-navsidebar{max-width:22em;background:var(--background-color-base,#fff);color:var(--color-base,#202122);border:1px solid var(--border-color-base,#a2a9b1);float:right;clear:right;margin:.5em 0 1em 1em}.mw-parser-output .tpl-navsidebar-floatright{float:right;clear:right;margin:.5em 0 1em 1em}.mw-parser-output .tpl-navsidebar-floatleft{float:left;clear:left;margin:.5em 1em 1em 0}.mw-parser-output .tpl-navsidebar-floatnone{float:none;clear:both;margin:.5em 0}.mw-parser-output .tpl-navsidebar-topimage{margin:0 0 16px 0}.mw-parser-output .tpl-navsidebar-title{margin:8px 16px;border-bottom:3px solid var(--border-color-muted,#eaecf0);font-size:20px;text-align:center}.mw-parser-output .tpl-navsidebar-image{margin:0 0 8px}.mw-parser-output .tpl-navsidebar-content{margin:0 0 16px 0;padding:0 8px}.mw-parser-output .tpl-navsidebar-heading{margin:8px 0;font-weight:bold}.mw-parser-output .tpl-navsidebar-foot{padding:0 8px;margin:0;text-align:right;font-size:smaller}@media not (min-width:720px){.mw-parser-output .tpl-navsidebar{float:none;clear:both;margin:.5em 0;max-width:none}}</style><div role="navigation" class="navigation-not-searchable tpl-navsidebar" style=""><p class="tpl-navsidebar-title"><a href="/wiki/Data_Platform" title="Data Platform">Data Platform</a></p><div class="tpl-navsidebar-contents"><div class="tpl-navsidebar-content"> <div class="mw-inputbox-centered" style=""><form name="searchbox" class="searchbox mw-inputbox-form-inline" action="/wiki/Special:Search"><div class="cdx-text-input"><input class="mw-searchInput searchboxInput cdx-text-input__input" name="search" placeholder="Search Data Platform documentation" size="40" dir="ltr"/></div><input type="hidden" value="incategory:Data_platform" name="searchfilter"/> <input type="submit" name="fulltext" value="Search" class="cdx-button"/><input type="hidden" value="Search" name="fulltext"/></form></div> </div><div class="tpl-navsidebar-content"> <p class="tpl-navsidebar-heading"><a href="/wiki/Data_Platform/Discover_data" title="Data Platform/Discover data">Discover data</a></p><p class="mw-empty-elt"> </p><ul><li><a class="external text" href="https://datahub.wikimedia.org/">Explore datasets in DataHub</a></li> <li><a href="/wiki/Data_Platform/Data_Lake" title="Data Platform/Data Lake">Data Lake</a> <ul><li><a href="/wiki/Data_Platform/Data_Lake/Traffic" title="Data Platform/Data Lake/Traffic">Traffic data</a></li> <li><a href="/wiki/Data_Platform/Data_Lake/Edits" title="Data Platform/Data Lake/Edits">Edits data</a></li> <li><a href="/wiki/Data_Platform/Data_Lake/Content" title="Data Platform/Data Lake/Content">Content data</a></li> <li><a href="/wiki/Data_Platform/Data_Lake/Events" title="Data Platform/Data Lake/Events">Events data</a></li></ul></li> <li><a href="/wiki/Data_Platform/AQS" title="Data Platform/AQS">Analytics Query Service (AQS)</a></li></ul> </div><div class="tpl-navsidebar-content"> <p class="tpl-navsidebar-heading"><a href="/wiki/Data_Platform/Analyze_data" title="Data Platform/Analyze data">Access, query, and analyze data</a></p><p class="mw-empty-elt"> </p><ul><li><a href="/wiki/Data_Platform/Data_access" title="Data Platform/Data access">Get access to internal data</a></li> <li>Analytics tools <ul><li><a href="/wiki/Data_Platform/Systems/Jupyter" title="Data Platform/Systems/Jupyter">Jupyter notebooks</a></li> <li><a href="/wiki/Data_Platform/Systems/Superset" title="Data Platform/Systems/Superset">Superset</a></li> <li><a href="/wiki/Data_Platform/Systems/Spark" title="Data Platform/Systems/Spark">Spark</a></li> <li><a href="/wiki/Data_Platform/Systems/Presto" title="Data Platform/Systems/Presto">Presto</a></li></ul></li> <li><a rel="nofollow" class="external text" href="https://github.com/wikimedia/wmfdata-python/blob/main/docs/quickstart.ipynb">Quickstart notebook</a></li> <li><a href="/wiki/Data_Platform/Internal_API_requests" title="Data Platform/Internal API requests">Internal API requests</a></li></ul> </div><div class="tpl-navsidebar-content"> <p class="tpl-navsidebar-heading"><a href="/wiki/Data_Platform/Transform_data" title="Data Platform/Transform data">Transform and publish data</a></p><p class="mw-empty-elt"> </p><ul><li><a href="https://www.mediawiki.org/wiki/Data_Platform_Engineering/Intake_Process" class="extiw" title="mw:Data Platform Engineering/Intake Process">Get help or file a request</a></li> <li><a href="/wiki/Data_Platform/Transform_data#Plan_data_lifecyle" title="Data Platform/Transform data">Plan data lifecyle</a></li> <li>Build tables and datasets <ul><li><a href="/wiki/Data_Platform/Dataset_creation" title="Data Platform/Dataset creation">Dataset creation process</a></li> <li><a href="/wiki/Data_Platform/Data_modeling_guidelines" title="Data Platform/Data modeling guidelines"> Data modeling guidelines</a></li> <li><a href="/wiki/Data_Platform/Systems/Airflow/Developer_guide" title="Data Platform/Systems/Airflow/Developer guide">Airflow developer guide</a></li> <li><a href="/wiki/Data_Platform/Systems/Hive" title="Data Platform/Systems/Hive">Hive</a></li> <li><a href="/wiki/Data_Platform/Systems/Iceberg" title="Data Platform/Systems/Iceberg">Iceberg</a></li> <li><a href="/wiki/Data_Platform/Systems/Druid" title="Data Platform/Systems/Druid">Druid</a></li></ul></li> <li>Share data and dashboards <ul><li><a href="https://foundation.wikimedia.org/wiki/Legal:Data_publication_guidelines" class="extiw" title="foundation:Legal:Data publication guidelines"> Data publication guidelines</a></li> <li><a href="/wiki/Data_Platform/Systems/Turnilo" title="Data Platform/Systems/Turnilo">Turnilo</a></li> <li><a href="/wiki/Data_Platform/Systems/Superset" title="Data Platform/Systems/Superset">Superset</a></li> <li><a href="/wiki/Data_Platform/Systems/analytics.wikimedia.org" title="Data Platform/Systems/analytics.wikimedia.org"> analytics.wikimedia.org</a></li> <li><a href="/wiki/Data_Platform/Web_publication" title="Data Platform/Web publication"> Web publication guide</a></li> <li><a href="/wiki/Data_Platform/Systems/Dashiki" title="Data Platform/Systems/Dashiki"> Dashiki</a></li></ul></li> <li>Manage published data <ul><li><a href="/wiki/Data_Incident_management" class="mw-redirect" title="Data Incident management"> Data Incident management</a></li> <li><a href="/wiki/Data_Platform/Data_Lake/Data_Issues" title="Data Platform/Data Lake/Data Issues"> Data Issue reporting</a></li> <li><a href="https://foundation.wikimedia.org/wiki/Legal:Data_retention_guidelines" class="extiw" title="foundation:Legal:Data retention guidelines">Data Retention Guidelines</a></li> <li><a href="/wiki/Data_Platform/Systems/Event_Data_retention" title="Data Platform/Systems/Event Data retention">Event data retention</a></li> <li><a href="/wiki/Data_Platform/Event_Sanitization" title="Data Platform/Event Sanitization">Event Sanitization</a></li> <li><a href="/wiki/Data_Platform/Dataset_archiving_and_deletion" title="Data Platform/Dataset archiving and deletion">Dataset archiving and deletion</a></li></ul></li></ul> </div><div class="tpl-navsidebar-content"> <p class="tpl-navsidebar-heading">Collect data</p><p class="mw-empty-elt"> </p><ul><li><a href="/wiki/Metrics_Platform" title="Metrics Platform">Metrics platform</a></li> <li><a href="/wiki/Event_Platform/Instrumentation_How_To" title="Event Platform/Instrumentation How To">Instrumentation tutorial</a></li> <li><a href="/wiki/Event_Platform" title="Event Platform">Event Platform</a></li></ul> <hr/> </div><div class="tpl-navsidebar-content"> <p class="tpl-navsidebar-heading">Data Platform infrastructure and operations</p><p class="mw-empty-elt"> </p><ul><li><a href="/wiki/Data_Platform/Systems" title="Data Platform/Systems">Systems overview</a></li> <li><a href="/wiki/Category:Data_pipelines" title="Category:Data pipelines"> Data pipelines</a></li> <li>Search <ul><li><a href="/wiki/Search/Technical_interactions" title="Search/Technical interactions"> Using search for new features </a></li> <li><a href="/wiki/Search_Platform/Documentation#Search" title="Search Platform/Documentation"> Search Platform </a></li> <li><a href="/wiki/Wikidata_Query_Service" title="Wikidata Query Service"> Wikidata Query Service (WDQS) </a></li></ul></li> <li>Operations and team processes <ul><li><a href="/wiki/Data_Platform_Engineering/Ops_week" title="Data Platform Engineering/Ops week">Ops week</a></li> <li><a href="/wiki/Data_Platform_Engineering" title="Data Platform Engineering">Team pages on Wikitech</a></li> <li><a href="https://www.mediawiki.org/wiki/Data_Platform_Engineering" class="extiw" title="mw:Data Platform Engineering">Team and project pages on MediaWiki.org</a></li></ul></li></ul> </div></div><p class="tpl-navsidebar-foot">[<span class="noprint plainlinks"><a class="external text" href="https://wikitech.wikimedia.org/w/index.php?title=Template:Navigation_Data_Platform&amp;action=edit"><span title="Edit this template">edit</span></a></span>]</p></div> <p>The production cluster has several servers which you can use to access the various private data sources and do general statistical computation. They are called the <b><a href="https://gerrit.wikimedia.org/g/operations/puppet/%2B/55aace1b91d341844759df15f7b01b340daab558/modules/profile/manifests/analytics/cluster/client.pp" class="extiw" title="git:operations/puppet/+/55aace1b91d341844759df15f7b01b340daab558/modules/profile/manifests/analytics/cluster/client.pp">analytics clients</a></b>, since they act as clients accessing data from various other databases (but they are also known informally as <b>stat hosts, stat machines,</b> or <b>stat clients</b>). To learn more about how to access these, refer to <a href="/wiki/Data_Platform/Data_access" title="Data Platform/Data access">Data_Platform/Data access</a>. </p><p>They can all provide <a href="/wiki/Data_Platform/Systems/Jupyter" title="Data Platform/Systems/Jupyter">hosted Jupyter notebooks</a>. </p> <table class="wikitable"> <tbody><tr> <th>Host</th> <th>OS</th> <th>CPU cores</th> <th>RAM</th> <th>Disk Space</th> <th>GPU </th> <th>Relative I/O performance </th></tr> <tr> <td><a href="/wiki/Stat1008" title="Stat1008">stat1008</a></td> <td>Debian Bullseye</td> <td>32</td> <td>512G</td> <td>7.2TB</td> <td>yes </td> <td>4th out of 4 </td></tr> <tr> <td><a href="/wiki/Stat1009" title="Stat1009">stat1009</a></td> <td>Debian Bullseye</td> <td>72</td> <td>188G</td> <td>17TB</td> <td>no </td> <td>2nd out of 4 </td></tr> <tr> <td><a href="/wiki/Stat1010" title="Stat1010">stat1010</a></td> <td>Debian Bullseye</td> <td>72</td> <td>512G</td> <td>6TB</td> <td>yes </td> <td>1st out of 4 </td></tr> <tr> <td><a href="/wiki/Stat1011" title="Stat1011">stat1011</a></td> <td>Debian Bullseye</td> <td>48</td> <td>128G</td> <td>6TB</td> <td>no </td> <td>3rd out of 4 </td></tr></tbody></table> <meta property="mw:PageProp/toc"/> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="Jupyter" data-mw-thread-id="h-Jupyter"><span data-mw-comment-start="" id="h-Jupyter"></span>Jupyter<span data-mw-comment-end="h-Jupyter"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Jupyter","replies":[]}}--></div> <dl><dd><i>Main article: <a href="/wiki/Data_Platform/Systems/Jupyter" title="Data Platform/Systems/Jupyter">Data Platform/Systems/Jupyter</a></i></dd></dl> <p>Every client provides a hosted Jupyter environment for interactive notebooks and terminals. </p> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="Conda" data-mw-thread-id="h-Conda"><span data-mw-comment-start="" id="h-Conda"></span>Conda<span data-mw-comment-end="h-Conda"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Conda","replies":[]}}--></div> <dl><dd><i>Main article: <a href="/wiki/Data_Platform/Systems/Conda" title="Data Platform/Systems/Conda">Data Platform/Systems/Conda</a></i></dd></dl> <p>We use Conda on the analytics clients to help folks create isolated environments to work in and install whatever packages they need. It's best to do all your work inside a Conda environment. </p><p>If you use Jupyter, this is all handled automatically. If you're working through a standard terminal, make sure to follow the instructions on <a href="/wiki/Data_Engineering/Systems/Conda#Use_outside_Jupyter" class="mw-redirect" title="Data Engineering/Systems/Conda">the Conda page</a> to create and activate environments. </p> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="Querying_data" data-mw-thread-id="h-Querying_data"><span data-mw-comment-start="" id="h-Querying_data"></span>Querying data<span data-mw-comment-end="h-Querying_data"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Querying_data","replies":["h-Python-Querying_data","h-R-Querying_data"]}}--></div> <p>The easiest way to query data on one of the analytics client is to use one of the Wmfdata packages in a Jupyter environment. </p> <div class="mw-heading mw-heading3"><h3 id="Python" data-mw-thread-id="h-Python-Querying_data"><span data-mw-comment-start="" id="h-Python-Querying_data"></span>Python<span data-mw-comment-end="h-Python-Querying_data"></span></h3></div> <p>For Python, there is <a href="https://gitlab.wikimedia.org/repos/data-engineering/wmfdata-python" class="extiw" title="gitlab:repos/data-engineering/wmfdata-python">Wmfdata-Python</a>. It can access data through <a href="/wiki/Data_Platform/Systems/MariaDB" title="Data Platform/Systems/MariaDB">MariaDB</a>, <a href="/wiki/Data_Platform/Systems/Cluster/Hive" class="mw-redirect" title="Data Platform/Systems/Cluster/Hive">Hive</a>, <a href="/wiki/Data_Platform/Systems/Presto" title="Data Platform/Systems/Presto">Presto</a>, and <a href="/wiki/Data_Platform/Systems/Cluster/Spark" class="mw-redirect" title="Data Platform/Systems/Cluster/Spark">Spark</a> and has a number of other useful functions, like creating Spark sessions. For details, see <a href="https://gitlab.wikimedia.org/repos/data-engineering/wmfdata-python" class="extiw" title="gitlab:repos/data-engineering/wmfdata-python">the repository</a> and particularly the <a href="https://gitlab.wikimedia.org/repos/data-engineering/wmfdata-python/-/blob/main/docs/quickstart.ipynb" class="extiw" title="gitlab:repos/data-engineering/wmfdata-python/-/blob/main/docs/quickstart.ipynb">quickstart notebook</a>. </p> <div class="mw-heading mw-heading3"><h3 id="R" data-mw-thread-id="h-R-Querying_data"><span data-mw-comment-start="" id="h-R-Querying_data"></span>R<span data-mw-comment-end="h-R-Querying_data"></span></h3></div> <p>For R, there is <a rel="nofollow" class="external text" href="https://github.com/wikimedia/wmfdata-r">wmfdata-r</a>. It can access data from <a href="/wiki/Data_Platform/Systems/MariaDB" title="Data Platform/Systems/MariaDB">MariaDB</a> and <a href="/wiki/Data_Platform/Systems/Hive" title="Data Platform/Systems/Hive">Hive</a> and has many other useful functions, particularly for graphing and statistics. </p> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="Internet_access" data-mw-thread-id="h-Internet_access"><span data-mw-comment-start="" id="h-Internet_access"></span>Internet access<span data-mw-comment-end="h-Internet_access"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Internet_access","replies":[]}}--></div> <p>You may need to access the internet from the analytics clients (for example, to download a Python script using <code>pip</code>). By default, this will fail because the machines are tightly firewalled. You'll have to use the <a href="/wiki/HTTP_proxy" title="HTTP proxy">HTTP proxy</a>. </p> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="Resource_management" data-mw-thread-id="h-Resource_management"><span data-mw-comment-start="" id="h-Resource_management"></span>Resource management<span data-mw-comment-end="h-Resource_management"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Resource_management","replies":[]}}--></div> <p>Once <a href="https://phabricator.wikimedia.org/source/operations-puppet/browse/production/modules/profile/templates/analytics/client/limits/user-resource-control.conf.erb" class="extiw" title="phab:source/operations-puppet/browse/production/modules/profile/templates/analytics/client/limits/user-resource-control.conf.erb">90% of a client's memory is consumed</a>, the topmost memory-intensive processes are killed until sufficient memory is freed up. Only <a href="https://phabricator.wikimedia.org/source/operations-puppet/browse/production/modules/profile/manifests/analytics/client/limits.pp" class="extiw" title="phab:source/operations-puppet/browse/production/modules/profile/manifests/analytics/client/limits.pp">90% of CPUs resources</a> are available for user processes. </p> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="Local_data_storage" data-mw-thread-id="h-Local_data_storage"><span data-mw-comment-start="" id="h-Local_data_storage"></span>Local data storage<span data-mw-comment-end="h-Local_data_storage"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Local_data_storage","replies":["h-Checking_for_available_disk_space-Local_data_storage","h-Checking_the_space_used_by_your_files-Local_data_storage"]}}--></div> <p>First, note that the Analytics clients store data using redundant <a href="https://en.wikipedia.org/wiki/en:RAID" class="extiw" title="w:en:RAID">RAID</a> configurations, but are not otherwise backed up. Your home directory on HDFS (<code>/user/your-username</code>) is a safer place for important data. </p><p>Please <b>ensure that there is enough space on disk before storing big datasets/files</b>. On the Analytics clients, the home directories are stored under the /srv partition, so the command df -h should be used regularly to check space used. There are client nodes that are more crowded than other ones, so please try to use the least used client first (for example, checking with the aforementioned command what stat hosts has more free space). </p> <div class="mw-heading mw-heading3"><h3 id="Checking_for_available_disk_space" data-mw-thread-id="h-Checking_for_available_disk_space-Local_data_storage"><span data-mw-comment-start="" id="h-Checking_for_available_disk_space-Local_data_storage"></span>Checking for available disk space<span data-mw-comment-end="h-Checking_for_available_disk_space-Local_data_storage"></span></h3></div> <p>On all the Analytics clients the home directories are stored under the /srv partition, so the command df -h should be used regularly to check space used. There are client nodes that are more crowded than other ones, so please try to use the least used client first (for example, checking with the aforementioned command what stat hosts has more free space). </p><p>Here an example to clarify the last point, using the stat1009 host: </p> <div class="mw-highlight mw-highlight-lang-bash mw-content-ltr" dir="ltr"><pre><span></span>elukey@stat1007:~$<span class="w"> </span>df<span class="w"> </span>-h Filesystem<span class="w"> </span>Size<span class="w"> </span>Used<span class="w"> </span>Avail<span class="w"> </span>Use%<span class="w"> </span>Mounted<span class="w"> </span>on udev<span class="w"> </span>32G<span class="w"> </span><span class="m">0</span><span class="w"> </span>32G<span class="w"> </span><span class="m">0</span>%<span class="w"> </span>/dev tmpfs<span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span>666M<span class="w"> </span><span class="m">5</span>.7G<span class="w"> </span><span class="m">11</span>%<span class="w"> </span>/run /dev/md0<span class="w"> </span>92G<span class="w"> </span>16G<span class="w"> </span>71G<span class="w"> </span><span class="m">19</span>%<span class="w"> </span>/ tmpfs<span class="w"> </span>32G<span class="w"> </span><span class="m">1</span>.2M<span class="w"> </span>32G<span class="w"> </span><span class="m">1</span>%<span class="w"> </span>/dev/shm tmpfs<span class="w"> </span><span class="m">5</span>.0M<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="m">5</span>.0M<span class="w"> </span><span class="m">0</span>%<span class="w"> </span>/run/lock tmpfs<span class="w"> </span>32G<span class="w"> </span><span class="m">0</span><span class="w"> </span>32G<span class="w"> </span><span class="m">0</span>%<span class="w"> </span>/sys/fs/cgroup /dev/mapper/stat1007--vg-data<span class="w"> </span><span class="m">7</span>.2T<span class="w"> </span><span class="m">6</span>.4T<span class="w"> </span>404G<span class="w"> </span><span class="m">95</span>%<span class="w"> </span>/srv<span class="w"> </span>&lt;&lt;<span class="o">=====================================</span><span class="s">&lt;&lt;</span> <span class="s">tmpfs 6.3G 0 6.3G 0% /run/user/3088</span> <span class="s">tmpfs</span><span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span>%<span class="w"> </span>/run/user/13926 tmpfs<span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span>%<span class="w"> </span>/run/user/20171 fuse_dfs<span class="w"> </span><span class="m">2</span>.3P<span class="w"> </span><span class="m">1</span>.8P<span class="w"> </span>511T<span class="w"> </span><span class="m">78</span>%<span class="w"> </span>/mnt/hdfs tmpfs<span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span>%<span class="w"> </span>/run/user/18005 tmpfs<span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span>32K<span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">1</span>%<span class="w"> </span>/run/user/17677 labstore1006.wikimedia.org:/srv/dumps/xmldatadumps/public<span class="w"> </span>98T<span class="w"> </span>59T<span class="w"> </span>35T<span class="w"> </span><span class="m">64</span>%<span class="w"> </span>/mnt/nfs/dumps-labstore1006.wikimedia.org labstore1007.wikimedia.org:/<span class="w"> </span>97T<span class="w"> </span>65T<span class="w"> </span>28T<span class="w"> </span><span class="m">70</span>%<span class="w"> </span>/mnt/nfs/dumps-labstore1007.wikimedia.org tmpfs<span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span>%<span class="w"> </span>/run/user/22235 tmpfs<span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span>%<span class="w"> </span>/run/user/22071 tmpfs<span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="m">6</span>.3G<span class="w"> </span><span class="m">0</span>%<span class="w"> </span>/run/user/10668 </pre></div> <p>In this case, the /srv partition is almost full, so it is better to look for another stat1xxx host. </p> <div class="mw-heading mw-heading3"><h3 id="Checking_the_space_used_by_your_files" data-mw-thread-id="h-Checking_the_space_used_by_your_files-Local_data_storage"><span data-mw-comment-start="" id="h-Checking_the_space_used_by_your_files-Local_data_storage"></span>Checking the space used by your files<span data-mw-comment-end="h-Checking_the_space_used_by_your_files-Local_data_storage"></span></h3></div> <p>It is sufficient to ssh to the host that you want to check and execute the following: </p> <div class="mw-highlight mw-highlight-lang-bash mw-content-ltr" dir="ltr"><pre><span></span><span class="c1"># Ensure that I am in my home directory, usually /home/your-username</span> <span class="c1"># if not, please do cd /home/your-username</span> elukey@stat1007:~$<span class="w"> </span><span class="nb">pwd</span> /home/elukey elukey@stat1007:~$<span class="w"> </span>du<span class="w"> </span>-hs 369M<span class="w"> </span>. </pre></div> <p>For a detailed view: </p> <div class="mw-highlight mw-highlight-lang-bash mw-content-ltr" dir="ltr"><pre><span></span><span class="c1"># Ensure that I am in my home directory, usually /home/your-username</span> <span class="c1"># if not, please do cd /home/your-username</span> elukey@stat1007:~$<span class="w"> </span><span class="nb">pwd</span> /home/elukey elukey@stat1007:~$<span class="w"> </span>du<span class="w"> </span>-hs<span class="w"> </span>*<span class="w"> </span><span class="p">|</span><span class="w"> </span>sort<span class="w"> </span>-h <span class="o">[</span>..<span class="o">]</span> 164K<span class="w"> </span>dump.out 648K<span class="w"> </span>eventlogging_cleaner.log <span class="m">7</span>.5M<span class="w"> </span>refinery 21M<span class="w"> </span>python_env 49M<span class="w"> </span>webrequest.stats.json 245M<span class="w"> </span>spark2-2.3.1-bin-hadoop2.6 </pre></div> <p>It is easy to have a quick view of how much data we are storing, and delete files that are not needed. </p><p>Alternatively, you can use the the tool <a rel="nofollow" class="external text" href="https://dev.yorhel.nl/ncdu/man">ncdu</a>, which provides a curses interface and lets you navigate around the directory tree and delete files as you encounter them. </p> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="Web_publication" data-mw-thread-id="h-Web_publication"><span data-mw-comment-start="" id="h-Web_publication"></span>Web publication<span data-mw-comment-end="h-Web_publication"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Web_publication","replies":[]}}--></div> <p>If you wish to <b>publish</b> a dataset or report from one of the analytics clients, you can place it in the <code>/srv/published/</code> directory, which will make it available on the web in the equivalent place under <a class="external text" href="https://analytics.wikimedia.org/published/">analytics.wikimedia.org/published/</a>. You can find more information on <a href="/wiki/Data_Platform/Web_publication" title="Data Platform/Web publication">Data_Platform/Web publication</a>. </p> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="GPU_usage" data-mw-thread-id="h-GPU_usage"><span data-mw-comment-start="" id="h-GPU_usage"></span>GPU usage<span data-mw-comment-end="h-GPU_usage"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-GPU_usage","replies":[]}}--></div> <p>On stat1008 and stat1010 we have deployed an AMD GPU for <a href="https://phabricator.wikimedia.org/T148843" class="extiw" title="phab:T148843">T148843</a>. The long term plan is to make it available for all the users logging in, but for the moment its access is restricted to the POSIX group <code>gpu-testers</code> to better test it (and avoid usage contention etc..). Please reach out to the Analytics team if you wish to get added to the group to test the GPU for your use case. </p> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="Rsync_between_clients" data-mw-thread-id="h-Rsync_between_clients"><span data-mw-comment-start="" id="h-Rsync_between_clients"></span>Rsync between clients<span data-mw-comment-end="h-Rsync_between_clients"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Rsync_between_clients","replies":[]}}--></div> <p>On every stat host there is a rsync server that allows users to copy data from one host to the other one. A typical use case would be to move a home directory. For example, let's see how user <code>batman</code> can copy all his data <b>from stat1006</b> <b>to stat1007</b>: </p> <div class="mw-highlight mw-highlight-lang-bash mw-content-ltr" dir="ltr"><pre><span></span>batman@stat1007:~$<span class="w"> </span>rsync<span class="w"> </span>--exclude<span class="w"> </span><span class="s2">"/.*/"</span><span class="w"> </span>-av<span class="w"> </span>stat1006.eqiad.wmnet::home/batman/<span class="w"> </span>~/ </pre></div><p>The key details: </p><ul><li>The command is <b>run on the destination host</b> (stat1007 in this case)</li> <li><code>--exclude "/.*/"</code> excludes top-level hidden directories like <code>~/.conda</code>, <code>~/.cache</code>, and <code>~/.jupyter</code> which usually shouldn't be copied between hosts</li> <li><code>-av stat1006.eqiad.wmnet::home/batman/</code> specifies the <b>path on the source host</b></li> <li><code>~/</code> is referring to the home directory on the destination host</li> <li>This is substantially faster and more secure than using <code>scp -3</code> on your laptop (e.g. <code>scp -3 stat1006:/home/batman/ stat1007:/home/batman/</code>)</li></ul><p> Suppose batman needed to sync a notebook he modified on stat1007 (now the source) <b>back</b> to stat1006 (now the destination):</p><div class="mw-highlight mw-highlight-lang-bash mw-content-ltr" dir="ltr"><pre><span></span>batman@stat1006:~$<span class="w"> </span>rsync<span class="w"> </span>-av<span class="w"> </span>stat1007.eqiad.wmnet::home/batman/Untitled.ipynb<span class="w"> </span>~/Untitled.ipynb </pre></div> <p>Please note that there is a limitation - the rsync daemon runs as user <code>nobody</code>, so in order to copy data the home directory files must have permissions set accordingly, otherwise you'll see permission errors while copying. If you are seeing this problem and you are unsure about how to set permissions, please contact the Data Engineering team via IRC on <a href="/wiki/Libera.chat" class="mw-redirect" title="Libera.chat">Libera.chat</a> (<span style="font-family: monospace,Courier; white-space: pre-wrap !important; word-wrap: break-word; max-width: 1200px; overflow: auto;"><a href="ircs://irc.libera.chat/wikimedia-analytics" class="extiw" title="liberachat:wikimedia-analytics">#wikimedia-analytics</a></span> <sup class="plainlinks"><a rel="nofollow" class="external text" href="https://web.libera.chat/?#wikimedia-analytics"><span style="color:green;">connect</span></a></sup>) or open a Phabricator task with the tag "Data-Engineering". </p> <div class="mw-heading mw-heading2 ext-discussiontools-init-section"><h2 id="Common_workflows_(WIP)" data-mw-thread-id="h-Common_workflows_(WIP)"><span id="Common_workflows_.28WIP.29"></span><span data-mw-comment-start="" id="h-Common_workflows_(WIP)"></span>Common workflows (WIP)<span data-mw-comment-end="h-Common_workflows_(WIP)"></span></h2><!--__DTELLIPSISBUTTON__{"threadItem":{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Common_workflows_(WIP)","replies":["h-Spark_jobs-Common_workflows_(WIP)"]}}--></div> <p>This section is mostly written to help SREs understand how the hosts are used. Feel free to update this with your workflows! </p> <div class="mw-heading mw-heading3"><h3 id="Spark_jobs" data-mw-thread-id="h-Spark_jobs-Common_workflows_(WIP)"><span data-mw-comment-start="" id="h-Spark_jobs-Common_workflows_(WIP)"></span>Spark jobs<span data-mw-comment-end="h-Spark_jobs-Common_workflows_(WIP)"></span></h3></div> <ul><li>Fast-running jobs (1 hr or less), these are cached on the spark workers or held in RAM on the stat hosts.</li> <li>Expensive jobs ( > 1h). These write to the disk (that way, even if the notebook/server stops, the spark job will still complete and write the output). With this approach, the state can be easily recovered. The disadvantage is that you have to manually clean up your storage.</li></ul> <!-- NewPP limit report Parsed by mw‐web.eqiad.main‐7d588db968‐xr4gw Cached time: 20241104231403 Cache expiry: 2592000 Reduced expiry: false Complications: [show‐toc] DiscussionTools time usage: 0.018 seconds CPU time usage: 0.062 seconds Real time usage: 0.078 seconds Preprocessor visited node count: 299/1000000 Post‐expand include size: 12837/2097152 bytes Template argument size: 6531/2097152 bytes Highest expansion depth: 7/100 Expensive parser function count: 5/500 Unstrip recursion depth: 0/20 Unstrip post‐expand size: 9384/5000000 bytes --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 29.432 1 -total 69.93% 20.582 1 Template:Navigation_Data_Platform 58.70% 17.278 1 Template:Navigation_sidebar 10.22% 3.009 1 Template:Irc 9.74% 2.868 2 Template:Main --> <!-- Saved in parser cache with key labswiki:pcache:idhash:444523-0!canonical and timestamp 20241104231403 and revision id 2241496. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://wikitech.wikimedia.org/w/index.php?title=Data_Platform/Systems/Clients&amp;oldid=2241496">https://wikitech.wikimedia.org/w/index.php?title=Data_Platform/Systems/Clients&amp;oldid=2241496</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Special:Categories" title="Special:Categories">Categories</a>: <ul><li><a href="/wiki/Category:Data_platform" title="Category:Data platform">Data platform</a></li><li><a href="/wiki/Category:Data_platform_systems" title="Category:Data platform systems">Data platform systems</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 4 November 2024, at 23:07.</li> <li id="footer-info-copyright">Text is available under the <a rel="nofollow" class="external text" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en">Creative Commons Attribution-ShareAlike License</a>; additional terms may apply. See <a class="external text" href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use">Terms of Use</a> for details.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Main_Page">About Wikitech</a></li> <li id="footer-places-disclaimers"><a href="https://foundation.wikimedia.org/wiki/General_disclaimer">Disclaimers</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://www.mediawiki.org/wiki/Special:MyLanguage/Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/wikitech.wikimedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//wikitech.wikimedia.org/w/index.php?title=Data_Platform/Systems/Clients&amp;mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-f69cdc8f6-nhjg5","wgBackendResponseTime":134,"wgDiscussionToolsPageThreads":[{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Jupyter","replies":[]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Conda","replies":[]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Querying_data","replies":[{"headingLevel":3,"name":"h-","type":"heading","level":0,"id":"h-Python-Querying_data","replies":[]},{"headingLevel":3,"name":"h-","type":"heading","level":0,"id":"h-R-Querying_data","replies":[]}]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Internet_access","replies":[]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Resource_management","replies":[]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Local_data_storage","replies":[{"headingLevel":3,"name":"h-","type":"heading","level":0,"id":"h-Checking_for_available_disk_space-Local_data_storage","replies":[]},{"headingLevel":3,"name":"h-","type":"heading","level":0,"id":"h-Checking_the_space_used_by_your_files-Local_data_storage","replies":[]}]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Web_publication","replies":[]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-GPU_usage","replies":[]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Rsync_between_clients","replies":[]},{"headingLevel":2,"name":"h-","type":"heading","level":0,"id":"h-Common_workflows_(WIP)","replies":[{"headingLevel":3,"name":"h-","type":"heading","level":0,"id":"h-Spark_jobs-Common_workflows_(WIP)","replies":[]}]}],"wgPageParseReport":{"discussiontools":{"limitreport-timeusage":"0.018"},"limitreport":{"cputime":"0.062","walltime":"0.078","ppvisitednodes":{"value":299,"limit":1000000},"postexpandincludesize":{"value":12837,"limit":2097152},"templateargumentsize":{"value":6531,"limit":2097152},"expansiondepth":{"value":7,"limit":100},"expensivefunctioncount":{"value":5,"limit":500},"unstrip-depth":{"value":0,"limit":20},"unstrip-size":{"value":9384,"limit":5000000},"timingprofile":["100.00% 29.432 1 -total"," 69.93% 20.582 1 Template:Navigation_Data_Platform"," 58.70% 17.278 1 Template:Navigation_sidebar"," 10.22% 3.009 1 Template:Irc"," 9.74% 2.868 2 Template:Main"]},"cachereport":{"origin":"mw-web.eqiad.main-7d588db968-xr4gw","timestamp":"20241104231403","ttl":2592000,"transientcontent":false}}});});</script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10