CINXE.COM
tf.data: Build TensorFlow input pipelines | TensorFlow Core
<!doctype html> <html lang="en" dir="ltr"> <head> <meta name="google-signin-client-id" content="157101835696-ooapojlodmuabs2do2vuhhnf90bccmoi.apps.googleusercontent.com"> <meta name="google-signin-scope" content="profile email https://www.googleapis.com/auth/developerprofiles https://www.googleapis.com/auth/developerprofiles.award"> <meta property="og:site_name" content="TensorFlow"> <meta property="og:type" content="website"><meta name="theme-color" content="#ff6f00"><meta charset="utf-8"> <meta content="IE=Edge" http-equiv="X-UA-Compatible"> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="manifest" href="/_pwa/tensorflow/manifest.json" crossorigin="use-credentials"> <link rel="preconnect" href="//www.gstatic.com" crossorigin> <link rel="preconnect" href="//fonts.gstatic.com" crossorigin> <link rel="preconnect" href="//fonts.googleapis.com" crossorigin> <link rel="preconnect" href="//apis.google.com" crossorigin> <link rel="preconnect" href="//www.google-analytics.com" crossorigin><link rel="stylesheet" href="//fonts.googleapis.com/css?family=Google+Sans:400,500|Roboto:400,400italic,500,500italic,700,700italic|Roboto+Mono:400,500,700&display=swap"> <link rel="stylesheet" href="//fonts.googleapis.com/css2?family=Material+Icons&family=Material+Symbols+Outlined&display=block"><link rel="stylesheet" href="https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/tensorflow/css/app.css"> <link rel="shortcut icon" href="https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/tensorflow/images/favicon.png"> <link rel="apple-touch-icon" href="https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/tensorflow/images/apple-touch-icon-180x180.png"><link rel="canonical" href="https://www.tensorflow.org/guide/data"><link rel="search" type="application/opensearchdescription+xml" title="TensorFlow" href="https://www.tensorflow.org/s/opensearch.xml"> <link rel="alternate" hreflang="en" href="https://www.tensorflow.org/guide/data" /><link rel="alternate" hreflang="x-default" href="https://www.tensorflow.org/guide/data" /><link rel="alternate" hreflang="ar" href="https://www.tensorflow.org/guide/data?hl=ar" /><link rel="alternate" hreflang="bn" href="https://www.tensorflow.org/guide/data?hl=bn" /><link rel="alternate" hreflang="zh-Hans" href="https://www.tensorflow.org/guide/data?hl=zh-cn" /><link rel="alternate" hreflang="fa" href="https://www.tensorflow.org/guide/data?hl=fa" /><link rel="alternate" hreflang="fr" href="https://www.tensorflow.org/guide/data?hl=fr" /><link rel="alternate" hreflang="he" href="https://www.tensorflow.org/guide/data?hl=he" /><link rel="alternate" hreflang="hi" href="https://www.tensorflow.org/guide/data?hl=hi" /><link rel="alternate" hreflang="id" href="https://www.tensorflow.org/guide/data?hl=id" /><link rel="alternate" hreflang="it" href="https://www.tensorflow.org/guide/data?hl=it" /><link rel="alternate" hreflang="ja" href="https://www.tensorflow.org/guide/data?hl=ja" /><link rel="alternate" hreflang="ko" href="https://www.tensorflow.org/guide/data?hl=ko" /><link rel="alternate" hreflang="pl" href="https://www.tensorflow.org/guide/data?hl=pl" /><link rel="alternate" hreflang="pt-BR" href="https://www.tensorflow.org/guide/data?hl=pt-br" /><link rel="alternate" hreflang="ru" href="https://www.tensorflow.org/guide/data?hl=ru" /><link rel="alternate" hreflang="es-419" href="https://www.tensorflow.org/guide/data?hl=es-419" /><link rel="alternate" hreflang="th" href="https://www.tensorflow.org/guide/data?hl=th" /><link rel="alternate" hreflang="tr" href="https://www.tensorflow.org/guide/data?hl=tr" /><link rel="alternate" hreflang="vi" href="https://www.tensorflow.org/guide/data?hl=vi" /><title>tf.data: Build TensorFlow input pipelines | TensorFlow Core</title> <meta property="og:title" content="tf.data: Build TensorFlow input pipelines | TensorFlow Core"><meta property="og:url" content="https://www.tensorflow.org/guide/data"><meta property="og:image" content="https://www.tensorflow.org/static/images/tf_logo_social.png"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="675"><meta property="og:locale" content="en"><meta name="twitter:card" content="summary_large_image"><script type="application/ld+json"> { "@context": "https://schema.org", "@type": "Article", "headline": "tf.data: Build TensorFlow input pipelines" } </script><script type="application/ld+json"> { "@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{ "@type": "ListItem", "position": 1, "name": "TensorFlow Core", "item": "https://www.tensorflow.org/tutorials" },{ "@type": "ListItem", "position": 2, "name": "tf.data: Build TensorFlow input pipelines", "item": "https://www.tensorflow.org/guide/data" }] } </script> <link rel="stylesheet" href="/extras.css"></head> <body class="" template="page" theme="tensorflow-theme" type="article" layout="docs" display-toc pending> <devsite-progress type="indeterminate" id="app-progress"></devsite-progress> <section class="devsite-wrapper"> <devsite-cookie-notification-bar></devsite-cookie-notification-bar><devsite-header role="banner"> <div class="devsite-header--inner nocontent"> <div class="devsite-top-logo-row-wrapper-wrapper"> <div class="devsite-top-logo-row-wrapper"> <div class="devsite-top-logo-row"> <button type="button" id="devsite-hamburger-menu" class="devsite-header-icon-button button-flat material-icons gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Navigation menu button" visually-hidden aria-label="Open menu"> </button> <div class="devsite-product-name-wrapper"> <a href="/" class="devsite-site-logo-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Site logo" track-type="globalNav" track-name="tensorFlow" track-metadata-position="nav" track-metadata-eventDetail="nav"> <picture> <img src="https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/tensorflow/images/lockup.svg" class="devsite-site-logo" alt="TensorFlow"> </picture> </a> <span class="devsite-product-name"> <ul class="devsite-breadcrumb-list" > <li class="devsite-breadcrumb-item "> </li> </ul> </span> </div> <div class="devsite-top-logo-row-middle"> <div class="devsite-header-upper-tabs"> <devsite-tabs class="upper-tabs"> <nav class="devsite-tabs-wrapper" aria-label="Upper tabs"> <tab > <a href="https://www.tensorflow.org/install" track-metadata-eventdetail="https://www.tensorflow.org/install" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - install" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Install" track-name="install" > Install </a> </tab> <tab class="devsite-dropdown devsite-active "> <a href="https://www.tensorflow.org/learn" track-metadata-eventdetail="https://www.tensorflow.org/learn" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - learn" track-metadata-module="primary nav" aria-label="Learn, selected" data-category="Site-Wide Custom Events" data-label="Tab: Learn" track-name="learn" > Learn </a> <a href="#" role="button" aria-haspopup="true" aria-expanded="false" aria-label="Dropdown menu for Learn" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/learn" track-metadata-position="nav - learn" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Learn" track-name="learn" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></a> <div class="devsite-tabs-dropdown" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column tfo-menu-column-learn"> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/learn" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/learn" track-metadata-position="nav - learn" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Introduction </div> <div class="devsite-nav-item-description"> New to TensorFlow? </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/tutorials" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/tutorials" track-metadata-position="nav - learn" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Tutorials </div> <div class="devsite-nav-item-description"> Learn how to use TensorFlow with end-to-end examples </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/guide" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/guide" track-metadata-position="nav - learn" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Guide </div> <div class="devsite-nav-item-description"> Learn framework concepts and components </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/learn-ml" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/learn-ml" track-metadata-position="nav - learn" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Learn ML </div> <div class="devsite-nav-item-description"> Educational resources to master your path with TensorFlow </div> </a> </li> </ul> </div> </div> </div> </tab> <tab class="devsite-dropdown "> <a href="https://www.tensorflow.org/api" track-metadata-eventdetail="https://www.tensorflow.org/api" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - api" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: API" track-name="api" > API </a> <a href="#" role="button" aria-haspopup="true" aria-expanded="false" aria-label="Dropdown menu for API" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/api" track-metadata-position="nav - api" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: API" track-name="api" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></a> <div class="devsite-tabs-dropdown" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/api/stable" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/api/stable" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> TensorFlow (v2.16.1) </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/versions" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/versions" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Versions… </div> </a> </li> </ul> </div> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://js.tensorflow.org/api/latest/" track-type="nav" track-metadata-eventdetail="https://js.tensorflow.org/api/latest/" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> TensorFlow.js </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/lite/api_docs" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/lite/api_docs" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> TensorFlow Lite </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/tfx/api_docs" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/tfx/api_docs" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> TFX </div> </a> </li> </ul> </div> </div> </div> </tab> <tab class="devsite-dropdown "> <a href="https://www.tensorflow.org/resources/models-datasets" track-metadata-eventdetail="https://www.tensorflow.org/resources/models-datasets" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - ecosystem" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Ecosystem" track-name="ecosystem" > Ecosystem </a> <a href="#" role="button" aria-haspopup="true" aria-expanded="false" aria-label="Dropdown menu for Ecosystem" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/models-datasets" track-metadata-position="nav - ecosystem" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Ecosystem" track-name="ecosystem" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></a> <div class="devsite-tabs-dropdown" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-title" role="heading" tooltip>LIBRARIES</li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/js" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/js" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="libraries" tooltip > <div class="devsite-nav-item-title"> TensorFlow.js </div> <div class="devsite-nav-item-description"> Develop web ML applications in JavaScript </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/lite" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/lite" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="libraries" tooltip > <div class="devsite-nav-item-title"> TensorFlow Lite </div> <div class="devsite-nav-item-description"> Deploy ML on mobile, microcontrollers and other edge devices </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/tfx" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/tfx" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="libraries" tooltip > <div class="devsite-nav-item-title"> TFX </div> <div class="devsite-nav-item-description"> Build production ML pipelines </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/libraries-extensions" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/libraries-extensions" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="libraries" tooltip > <div class="devsite-nav-item-title"> All libraries </div> <div class="devsite-nav-item-description"> Create advanced models and extend TensorFlow </div> </a> </li> </ul> </div> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-title" role="heading" tooltip>RESOURCES</li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/models-datasets" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/models-datasets" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="resources" tooltip > <div class="devsite-nav-item-title"> Models & datasets </div> <div class="devsite-nav-item-description"> Pre-trained models and datasets built by Google and the community </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/tools" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/tools" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="resources" tooltip > <div class="devsite-nav-item-title"> Tools </div> <div class="devsite-nav-item-description"> Tools to support and accelerate TensorFlow workflows </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/responsible_ai" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/responsible_ai" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="resources" tooltip > <div class="devsite-nav-item-title"> Responsible AI </div> <div class="devsite-nav-item-description"> Resources for every stage of the ML workflow </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/recommendation-systems" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/recommendation-systems" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="resources" tooltip > <div class="devsite-nav-item-title"> Recommendation systems </div> <div class="devsite-nav-item-description"> Build recommendation systems with open source tools </div> </a> </li> </ul> </div> </div> </div> </tab> <tab class="devsite-dropdown "> <a href="https://www.tensorflow.org/community" track-metadata-eventdetail="https://www.tensorflow.org/community" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - community" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Community" track-name="community" > Community </a> <a href="#" role="button" aria-haspopup="true" aria-expanded="false" aria-label="Dropdown menu for Community" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/community" track-metadata-position="nav - community" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Community" track-name="community" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></a> <div class="devsite-tabs-dropdown" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/community/groups" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/community/groups" track-metadata-position="nav - community" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Groups </div> <div class="devsite-nav-item-description"> User groups, interest groups and mailing lists </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/community/contribute" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/community/contribute" track-metadata-position="nav - community" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Contribute </div> <div class="devsite-nav-item-description"> Guide for contributing to code and documentation </div> </a> </li> <li class="devsite-nav-item"> <a href="https://blog.tensorflow.org/" track-type="nav" track-metadata-eventdetail="https://blog.tensorflow.org/" track-metadata-position="nav - community" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Blog </div> <div class="devsite-nav-item-description"> Stay up to date with all things TensorFlow </div> </a> </li> <li class="devsite-nav-item"> <a href="https://discuss.tensorflow.org" track-type="nav" track-metadata-eventdetail="https://discuss.tensorflow.org" track-metadata-position="nav - community" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Forum </div> <div class="devsite-nav-item-description"> Discussion platform for the TensorFlow community </div> </a> </li> </ul> </div> </div> </div> </tab> <tab class="devsite-dropdown "> <a href="https://www.tensorflow.org/about" track-metadata-eventdetail="https://www.tensorflow.org/about" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - why tensorflow" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Why TensorFlow" track-name="why tensorflow" > Why TensorFlow </a> <a href="#" role="button" aria-haspopup="true" aria-expanded="false" aria-label="Dropdown menu for Why TensorFlow" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/about" track-metadata-position="nav - why tensorflow" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Why TensorFlow" track-name="why tensorflow" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></a> <div class="devsite-tabs-dropdown" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/about" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/about" track-metadata-position="nav - why tensorflow" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> About </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/about/case-studies" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/about/case-studies" track-metadata-position="nav - why tensorflow" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Case studies </div> </a> </li> </ul> </div> </div> </div> </tab> </nav> </devsite-tabs> </div> <devsite-search enable-signin enable-search enable-suggestions enable-query-completion project-name="TensorFlow Core" tenant-name="TensorFlow" > <form class="devsite-search-form" action="https://www.tensorflow.org/s/results" method="GET"> <div class="devsite-search-container"> <button type="button" search-open class="devsite-search-button devsite-header-icon-button button-flat material-icons" aria-label="Open search"></button> <div class="devsite-searchbox"> <input aria-activedescendant="" aria-autocomplete="list" aria-label="Search" aria-expanded="false" aria-haspopup="listbox" autocomplete="off" class="devsite-search-field devsite-search-query" name="q" placeholder="Search" role="combobox" type="text" value="" > <div class="devsite-search-image material-icons" aria-hidden="true"> </div> <div class="devsite-search-shortcut-icon-container" aria-hidden="true"> <kbd class="devsite-search-shortcut-icon">/</kbd> </div> </div> </div> </form> <button type="button" search-close class="devsite-search-button devsite-header-icon-button button-flat material-icons" aria-label="Close search"></button> </devsite-search> </div> <devsite-language-selector> <ul role="presentation"> <li role="presentation"> <a role="menuitem" lang="en" >English</a> </li> <li role="presentation"> <a role="menuitem" lang="es_419" >Español – América Latina</a> </li> <li role="presentation"> <a role="menuitem" lang="fr" >Français</a> </li> <li role="presentation"> <a role="menuitem" lang="id" >Indonesia</a> </li> <li role="presentation"> <a role="menuitem" lang="it" >Italiano</a> </li> <li role="presentation"> <a role="menuitem" lang="pl" >Polski</a> </li> <li role="presentation"> <a role="menuitem" lang="pt_br" >Português – Brasil</a> </li> <li role="presentation"> <a role="menuitem" lang="vi" >Tiếng Việt</a> </li> <li role="presentation"> <a role="menuitem" lang="tr" >Türkçe</a> </li> <li role="presentation"> <a role="menuitem" lang="ru" >Русский</a> </li> <li role="presentation"> <a role="menuitem" lang="he" >עברית</a> </li> <li role="presentation"> <a role="menuitem" lang="ar" >العربيّة</a> </li> <li role="presentation"> <a role="menuitem" lang="fa" >فارسی</a> </li> <li role="presentation"> <a role="menuitem" lang="hi" >हिंदी</a> </li> <li role="presentation"> <a role="menuitem" lang="bn" >বাংলা</a> </li> <li role="presentation"> <a role="menuitem" lang="th" >ภาษาไทย</a> </li> <li role="presentation"> <a role="menuitem" lang="zh_cn" >中文 – 简体</a> </li> <li role="presentation"> <a role="menuitem" lang="ja" >日本語</a> </li> <li role="presentation"> <a role="menuitem" lang="ko" >한국어</a> </li> </ul> </devsite-language-selector> <a class="devsite-header-link devsite-top-button button gc-analytics-event" href="//github.com/tensorflow" data-category="Site-Wide Custom Events" data-label="Site header link" > GitHub </a> <devsite-user enable-profiles id="devsite-user"> <span class="button devsite-top-button" aria-hidden="true" visually-hidden>Sign in</span> </devsite-user> </div> </div> </div> <div class="devsite-collapsible-section "> <div class="devsite-header-background"> <div class="devsite-product-id-row" > <div class="devsite-product-description-row"> <ul class="devsite-breadcrumb-list" > <li class="devsite-breadcrumb-item "> <a href="https://www.tensorflow.org/tutorials" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Lower Header" data-value="1" track-type="globalNav" track-name="breadcrumb" track-metadata-position="1" track-metadata-eventdetail="TensorFlow Core" > TensorFlow Core </a> </li> </ul> </div> </div> <div class="devsite-doc-set-nav-row"> <devsite-tabs class="lower-tabs"> <nav class="devsite-tabs-wrapper" aria-label="Lower tabs"> <tab > <a href="https://www.tensorflow.org/tutorials" track-metadata-eventdetail="https://www.tensorflow.org/tutorials" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - tutorials" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Tutorials" track-name="tutorials" > Tutorials </a> </tab> <tab class="devsite-active"> <a href="https://www.tensorflow.org/guide" track-metadata-eventdetail="https://www.tensorflow.org/guide" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - guide" track-metadata-module="primary nav" aria-label="Guide, selected" data-category="Site-Wide Custom Events" data-label="Tab: Guide" track-name="guide" > Guide </a> </tab> <tab > <a href="https://www.tensorflow.org/guide/migrate" track-metadata-eventdetail="https://www.tensorflow.org/guide/migrate" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - migrate to tf2" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Migrate to TF2" track-name="migrate to tf2" > Migrate to TF2 </a> </tab> <tab > <a href="https://github.com/tensorflow/docs/tree/master/site/en/r1" track-metadata-eventdetail="https://github.com/tensorflow/docs/tree/master/site/en/r1" class="devsite-tabs-content gc-analytics-event " track-type="nav" track-metadata-position="nav - tf 1 ↗" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: TF 1 ↗" track-name="tf 1 ↗" > TF 1 ↗ </a> </tab> </nav> </devsite-tabs> </div> </div> </div> </div> </devsite-header> <devsite-book-nav scrollbars > <div class="devsite-book-nav-filter" > <span class="filter-list-icon material-icons" aria-hidden="true"></span> <input type="text" placeholder="Filter" aria-label="Type to filter" role="searchbox"> <span class="filter-clear-button hidden" data-title="Clear filter" aria-label="Clear filter" role="button" tabindex="0"></span> </div> <nav class="devsite-book-nav devsite-nav nocontent" aria-label="Side menu"> <div class="devsite-mobile-header"> <button type="button" id="devsite-close-nav" class="devsite-header-icon-button button-flat material-icons gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Close navigation" aria-label="Close navigation"> </button> <div class="devsite-product-name-wrapper"> <a href="/" class="devsite-site-logo-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Site logo" track-type="globalNav" track-name="tensorFlow" track-metadata-position="nav" track-metadata-eventDetail="nav"> <picture> <img src="https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/tensorflow/images/lockup.svg" class="devsite-site-logo" alt="TensorFlow"> </picture> </a> <span class="devsite-product-name"> <ul class="devsite-breadcrumb-list" > <li class="devsite-breadcrumb-item "> </li> </ul> </span> </div> </div> <div class="devsite-book-nav-wrapper"> <div class="devsite-mobile-nav-top"> <ul class="devsite-nav-list"> <li class="devsite-nav-item"> <a href="/install" class="devsite-nav-title gc-analytics-event devsite-nav-has-children " data-category="Site-Wide Custom Events" data-label="Tab: Install" track-name="install" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Install" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Install </span> <span class="devsite-nav-icon material-icons" data-icon="forward" > </span> </a> </li> <li class="devsite-nav-item"> <a href="/learn" class="devsite-nav-title gc-analytics-event devsite-nav-active" data-category="Site-Wide Custom Events" data-label="Tab: Learn" track-name="learn" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Learn" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Learn </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: Learn" track-name="learn" > <span class="devsite-nav-text" tooltip menu="Learn"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="Learn"> </span> </span> </li> </ul> <ul class="devsite-nav-responsive-tabs"> <li class="devsite-nav-item"> <a href="/tutorials" class="devsite-nav-title gc-analytics-event devsite-nav-has-children " data-category="Site-Wide Custom Events" data-label="Tab: Tutorials" track-name="tutorials" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Tutorials" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Tutorials </span> <span class="devsite-nav-icon material-icons" data-icon="forward" > </span> </a> </li> <li class="devsite-nav-item"> <a href="/guide" class="devsite-nav-title gc-analytics-event devsite-nav-has-children devsite-nav-active" data-category="Site-Wide Custom Events" data-label="Tab: Guide" track-name="guide" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Guide" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip menu="_book"> Guide </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="_book"> </span> </a> </li> <li class="devsite-nav-item"> <a href="/guide/migrate" class="devsite-nav-title gc-analytics-event devsite-nav-has-children " data-category="Site-Wide Custom Events" data-label="Tab: Migrate to TF2" track-name="migrate to tf2" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Migrate to TF2" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Migrate to TF2 </span> <span class="devsite-nav-icon material-icons" data-icon="forward" > </span> </a> </li> <li class="devsite-nav-item"> <a href="https://github.com/tensorflow/docs/tree/master/site/en/r1" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: TF 1 ↗" track-name="tf 1 ↗" data-category="Site-Wide Custom Events" data-label="Responsive Tab: TF 1 ↗" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TF 1 ↗ </span> </a> </li> </ul> </li> <li class="devsite-nav-item"> <a href="/api" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: API" track-name="api" data-category="Site-Wide Custom Events" data-label="Responsive Tab: API" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > API </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: API" track-name="api" > <span class="devsite-nav-text" tooltip menu="API"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="API"> </span> </span> </li> </ul> </li> <li class="devsite-nav-item"> <a href="/resources/models-datasets" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: Ecosystem" track-name="ecosystem" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Ecosystem" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Ecosystem </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: Ecosystem" track-name="ecosystem" > <span class="devsite-nav-text" tooltip menu="Ecosystem"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="Ecosystem"> </span> </span> </li> </ul> </li> <li class="devsite-nav-item"> <a href="/community" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: Community" track-name="community" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Community" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Community </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: Community" track-name="community" > <span class="devsite-nav-text" tooltip menu="Community"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="Community"> </span> </span> </li> </ul> </li> <li class="devsite-nav-item"> <a href="/about" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: Why TensorFlow" track-name="why tensorflow" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Why TensorFlow" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Why TensorFlow </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: Why TensorFlow" track-name="why tensorflow" > <span class="devsite-nav-text" tooltip menu="Why TensorFlow"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="Why TensorFlow"> </span> </span> </li> </ul> </li> <li class="devsite-nav-item"> <a href="//github.com/tensorflow" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: GitHub" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > GitHub </span> </a> </li> </ul> </div> <div class="devsite-mobile-nav-bottom"> <ul class="devsite-nav-list" menu="_book"> <li class="devsite-nav-item"><a href="/guide" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide" ><span class="devsite-nav-text" tooltip>TensorFlow guide</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>TensorFlow basics</span> </div></li> <li class="devsite-nav-item"><a href="/guide/basics" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/basics" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/basics" ><span class="devsite-nav-text" tooltip>Overview</span></a></li> <li class="devsite-nav-item"><a href="/guide/tensor" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tensor" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tensor" ><span class="devsite-nav-text" tooltip>Tensors</span></a></li> <li class="devsite-nav-item"><a href="/guide/variable" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/variable" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/variable" ><span class="devsite-nav-text" tooltip>Variables</span></a></li> <li class="devsite-nav-item"><a href="/guide/autodiff" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/autodiff" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/autodiff" ><span class="devsite-nav-text" tooltip>Automatic differentiation</span></a></li> <li class="devsite-nav-item"><a href="/guide/intro_to_graphs" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/intro_to_graphs" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/intro_to_graphs" ><span class="devsite-nav-text" tooltip>Graphs and functions</span></a></li> <li class="devsite-nav-item"><a href="/guide/intro_to_modules" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/intro_to_modules" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/intro_to_modules" ><span class="devsite-nav-text" tooltip>Modules, layers, and models</span></a></li> <li class="devsite-nav-item"><a href="/guide/basic_training_loops" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/basic_training_loops" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/basic_training_loops" ><span class="devsite-nav-text" tooltip>Training loops</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Keras</span> </div></li> <li class="devsite-nav-item"><a href="/guide/keras" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras" ><span class="devsite-nav-text" tooltip>Overview</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/sequential_model" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/sequential_model" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/sequential_model" ><span class="devsite-nav-text" tooltip>The Sequential model</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/functional_api" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/functional_api" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/functional_api" ><span class="devsite-nav-text" tooltip>The Functional API</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/training_with_built_in_methods" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/training_with_built_in_methods" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/training_with_built_in_methods" ><span class="devsite-nav-text" tooltip>Training & evaluation with the built-in methods</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/making_new_layers_and_models_via_subclassing" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/making_new_layers_and_models_via_subclassing" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/making_new_layers_and_models_via_subclassing" ><span class="devsite-nav-text" tooltip>Making new layers and models via subclassing</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/serialization_and_saving" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/serialization_and_saving" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/serialization_and_saving" ><span class="devsite-nav-text" tooltip>Serialization and saving</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/customizing_saving_and_serialization" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/customizing_saving_and_serialization" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/customizing_saving_and_serialization" ><span class="devsite-nav-text" tooltip>Customizing Saving</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/preprocessing_layers" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/preprocessing_layers" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/preprocessing_layers" ><span class="devsite-nav-text" tooltip>Working with preprocessing layers</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/customizing_what_happens_in_fit" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/customizing_what_happens_in_fit" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/customizing_what_happens_in_fit" ><span class="devsite-nav-text" tooltip>Customizing what happens in fit()</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/writing_a_training_loop_from_scratch" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/writing_a_training_loop_from_scratch" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/writing_a_training_loop_from_scratch" ><span class="devsite-nav-text" tooltip>Writing a training loop from scratch</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/working_with_rnns" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/working_with_rnns" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/working_with_rnns" ><span class="devsite-nav-text" tooltip>Working with RNNs</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/understanding_masking_and_padding" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/understanding_masking_and_padding" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/understanding_masking_and_padding" ><span class="devsite-nav-text" tooltip>Understanding masking & padding</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/writing_your_own_callbacks" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/writing_your_own_callbacks" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/writing_your_own_callbacks" ><span class="devsite-nav-text" tooltip>Writing your own callbacks</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/transfer_learning" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/transfer_learning" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/transfer_learning" ><span class="devsite-nav-text" tooltip>Transfer learning & fine-tuning</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/distributed_training" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/distributed_training" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/distributed_training" ><span class="devsite-nav-text" tooltip>Multi-GPU and distributed training</span></a></li> <li class="devsite-nav-item devsite-nav-heading devsite-nav-new"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Build with Core</span><span class="devsite-nav-icon material-icons" data-icon="new" data-title="New!" aria-hidden="true"></span> </div></li> <li class="devsite-nav-item"><a href="/guide/core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core" ><span class="devsite-nav-text" tooltip>Overview</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/quickstart_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/quickstart_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/quickstart_core" ><span class="devsite-nav-text" tooltip>Quickstart for Core</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/logistic_regression_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/logistic_regression_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/logistic_regression_core" ><span class="devsite-nav-text" tooltip>Logistic regression</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/mlp_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/mlp_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/mlp_core" ><span class="devsite-nav-text" tooltip>Multilayer perceptrons</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/matrix_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/matrix_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/matrix_core" ><span class="devsite-nav-text" tooltip>Matrix approximation</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/optimizers_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/optimizers_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/optimizers_core" ><span class="devsite-nav-text" tooltip>Custom optimizers</span></a></li> <li class="devsite-nav-item devsite-nav-experimental"><a href="/guide/core/distribution" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/distribution" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/distribution" ><span class="devsite-nav-text" tooltip>DTensor with Core APIs</span><span class="devsite-nav-icon material-icons" data-icon="experimental" data-title="Experimental!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>TensorFlow in depth</span> </div></li> <li class="devsite-nav-item"><a href="/guide/tensor_slicing" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tensor_slicing" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tensor_slicing" ><span class="devsite-nav-text" tooltip>Tensor slicing</span></a></li> <li class="devsite-nav-item"><a href="/guide/advanced_autodiff" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/advanced_autodiff" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/advanced_autodiff" ><span class="devsite-nav-text" tooltip>Advanced autodiff</span></a></li> <li class="devsite-nav-item"><a href="/guide/ragged_tensor" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/ragged_tensor" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/ragged_tensor" ><span class="devsite-nav-text" tooltip>Ragged tensor</span></a></li> <li class="devsite-nav-item"><a href="/guide/sparse_tensor" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/sparse_tensor" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/sparse_tensor" ><span class="devsite-nav-text" tooltip>Sparse tensor</span></a></li> <li class="devsite-nav-item"><a href="/guide/random_numbers" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/random_numbers" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/random_numbers" ><span class="devsite-nav-text" tooltip>Random number generation</span></a></li> <li class="devsite-nav-item devsite-nav-experimental"><a href="/guide/tf_numpy" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tf_numpy" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tf_numpy" ><span class="devsite-nav-text" tooltip>NumPy API</span><span class="devsite-nav-icon material-icons" data-icon="experimental" data-title="Experimental!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-nightly"><a href="/guide/tf_numpy_type_promotion" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tf_numpy_type_promotion" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tf_numpy_type_promotion" ><span class="devsite-nav-text" tooltip>NumPy API Type Promotion</span><span class="devsite-nav-icon material-icons" data-icon="nightly" data-title="Nightly build only" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-experimental"><a href="/guide/dtensor_overview" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/dtensor_overview" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/dtensor_overview" ><span class="devsite-nav-text" tooltip>DTensor concepts</span><span class="devsite-nav-icon material-icons" data-icon="experimental" data-title="Experimental!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item"><a href="/guide/effective_tf2" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/effective_tf2" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/effective_tf2" ><span class="devsite-nav-text" tooltip>Thinking in TensorFlow 2</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Customization</span> </div></li> <li class="devsite-nav-item"><a href="/guide/create_op" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/create_op" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/create_op" ><span class="devsite-nav-text" tooltip>Create an op</span></a></li> <li class="devsite-nav-item devsite-nav-experimental"><a href="/guide/extension_type" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/extension_type" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/extension_type" ><span class="devsite-nav-text" tooltip>Extension types</span><span class="devsite-nav-icon material-icons" data-icon="experimental" data-title="Experimental!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Data input pipelines</span> </div></li> <li class="devsite-nav-item"><a href="/guide/data" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/data" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/data" ><span class="devsite-nav-text" tooltip>tf.data</span></a></li> <li class="devsite-nav-item"><a href="/guide/data_performance" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/data_performance" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/data_performance" ><span class="devsite-nav-text" tooltip>Optimize pipeline performance</span></a></li> <li class="devsite-nav-item"><a href="/guide/data_performance_analysis" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/data_performance_analysis" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/data_performance_analysis" ><span class="devsite-nav-text" tooltip>Analyze pipeline performance</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Import and export</span> </div></li> <li class="devsite-nav-item"><a href="/guide/checkpoint" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/checkpoint" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/checkpoint" ><span class="devsite-nav-text" tooltip>Checkpoint</span></a></li> <li class="devsite-nav-item"><a href="/guide/saved_model" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/saved_model" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/saved_model" ><span class="devsite-nav-text" tooltip>SavedModel</span></a></li> <li class="devsite-nav-item devsite-nav-new"><a href="/guide/jax2tf" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/jax2tf" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/jax2tf" ><span class="devsite-nav-text" tooltip>Import a JAX model using JAX2TF</span><span class="devsite-nav-icon material-icons" data-icon="new" data-title="New!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Accelerators</span> </div></li> <li class="devsite-nav-item"><a href="/guide/distributed_training" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/distributed_training" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/distributed_training" ><span class="devsite-nav-text" tooltip>Distributed training</span></a></li> <li class="devsite-nav-item"><a href="/guide/gpu" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/gpu" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/gpu" ><span class="devsite-nav-text" tooltip>GPU</span></a></li> <li class="devsite-nav-item"><a href="/guide/tpu" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tpu" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tpu" ><span class="devsite-nav-text" tooltip>TPU</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Performance</span> </div></li> <li class="devsite-nav-item"><a href="/guide/function" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/function" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/function" ><span class="devsite-nav-text" tooltip>Better performance with tf.function</span></a></li> <li class="devsite-nav-item"><a href="/guide/profiler" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/profiler" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/profiler" ><span class="devsite-nav-text" tooltip>Profile TensorFlow performance</span></a></li> <li class="devsite-nav-item"><a href="/guide/gpu_performance_analysis" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/gpu_performance_analysis" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/gpu_performance_analysis" ><span class="devsite-nav-text" tooltip>Optimize GPU Performance</span></a></li> <li class="devsite-nav-item"><a href="/guide/graph_optimization" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/graph_optimization" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/graph_optimization" ><span class="devsite-nav-text" tooltip>Graph optimization</span></a></li> <li class="devsite-nav-item"><a href="/guide/mixed_precision" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/mixed_precision" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/mixed_precision" ><span class="devsite-nav-text" tooltip>Mixed precision</span></a></li> <li class="devsite-nav-item devsite-nav-heading devsite-nav-new"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Model Garden</span><span class="devsite-nav-icon material-icons" data-icon="new" data-title="New!" aria-hidden="true"></span> </div></li> <li class="devsite-nav-item"><a href="/tfmodels" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels" ><span class="devsite-nav-text" tooltip>Overview</span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/orbit" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/orbit" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/orbit" ><span class="devsite-nav-text" tooltip>Training with Orbit</span></a></li> <li class="devsite-nav-item devsite-nav-external"><a href="/tfmodels/nlp" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/nlp" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/nlp" ><span class="devsite-nav-text" tooltip>TFModels - NLP</span><span class="devsite-nav-icon material-icons" data-icon="external" data-title="External" aria-hidden="true"></span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/vision/image_classification" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/vision/image_classification" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/vision/image_classification" ><span class="devsite-nav-text" tooltip>Example: Image classification</span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/vision/object_detection" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/vision/object_detection" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/vision/object_detection" ><span class="devsite-nav-text" tooltip>Example: Object Detection</span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/vision/semantic_segmentation" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/vision/semantic_segmentation" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/vision/semantic_segmentation" ><span class="devsite-nav-text" tooltip>Example: Semantic Segmentation</span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/vision/instance_segmentation" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/vision/instance_segmentation" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/vision/instance_segmentation" ><span class="devsite-nav-text" tooltip>Example: Instance Segmentation</span></a></li> <li class="devsite-nav-item devsite-nav-heading devsite-nav-deprecated"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Estimators</span><span class="devsite-nav-icon material-icons" data-icon="deprecated" data-title="Deprecated" aria-hidden="true"></span> </div></li> <li class="devsite-nav-item"><a href="/guide/estimator" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/estimator" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/estimator" ><span class="devsite-nav-text" tooltip>Estimator overview</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Appendix</span> </div></li> <li class="devsite-nav-item"><a href="/guide/versions" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/versions" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/versions" ><span class="devsite-nav-text" tooltip>Version compatibility</span></a></li> </ul> <ul class="devsite-nav-list" menu="Learn" aria-label="Side menu" hidden> <li class="devsite-nav-item"> <a href="/learn" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Introduction" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Introduction </span> </a> </li> <li class="devsite-nav-item"> <a href="/tutorials" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Tutorials" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Tutorials </span> </a> </li> <li class="devsite-nav-item"> <a href="/guide" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Guide" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Guide </span> </a> </li> <li class="devsite-nav-item"> <a href="/resources/learn-ml" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Learn ML" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Learn ML </span> </a> </li> </ul> <ul class="devsite-nav-list" menu="API" aria-label="Side menu" hidden> <li class="devsite-nav-item"> <a href="/api/stable" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow (v2.16.1)" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow (v2.16.1) </span> </a> </li> <li class="devsite-nav-item"> <a href="/versions" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Versions…" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Versions… </span> </a> </li> <li class="devsite-nav-item"> <a href="https://js.tensorflow.org/api/latest/" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow.js" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow.js </span> </a> </li> <li class="devsite-nav-item"> <a href="/lite/api_docs" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow Lite" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow Lite </span> </a> </li> <li class="devsite-nav-item"> <a href="/tfx/api_docs" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TFX" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TFX </span> </a> </li> </ul> <ul class="devsite-nav-list" menu="Ecosystem" aria-label="Side menu" hidden> <li class="devsite-nav-item devsite-nav-heading"> <span class="devsite-nav-title" tooltip > <span class="devsite-nav-text" tooltip > LIBRARIES </span> </span> </li> <li class="devsite-nav-item"> <a href="/js" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow.js" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow.js </span> </a> </li> <li class="devsite-nav-item"> <a href="/lite" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow Lite" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow Lite </span> </a> </li> <li class="devsite-nav-item"> <a href="/tfx" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TFX" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TFX </span> </a> </li> <li class="devsite-nav-item"> <a href="/resources/libraries-extensions" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: All libraries" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > All libraries </span> </a> </li> <li class="devsite-nav-item devsite-nav-heading"> <span class="devsite-nav-title" tooltip > <span class="devsite-nav-text" tooltip > RESOURCES </span> </span> </li> <li class="devsite-nav-item"> <a href="/resources/models-datasets" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Models & datasets" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Models & datasets </span> </a> </li> <li class="devsite-nav-item"> <a href="/resources/tools" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Tools" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Tools </span> </a> </li> <li class="devsite-nav-item"> <a href="/responsible_ai" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Responsible AI" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Responsible AI </span> </a> </li> <li class="devsite-nav-item"> <a href="/resources/recommendation-systems" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Recommendation systems" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Recommendation systems </span> </a> </li> </ul> <ul class="devsite-nav-list" menu="Community" aria-label="Side menu" hidden> <li class="devsite-nav-item"> <a href="/community/groups" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Groups" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Groups </span> </a> </li> <li class="devsite-nav-item"> <a href="/community/contribute" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Contribute" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Contribute </span> </a> </li> <li class="devsite-nav-item"> <a href="https://blog.tensorflow.org/" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Blog" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Blog </span> </a> </li> <li class="devsite-nav-item"> <a href="https://discuss.tensorflow.org" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Forum" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Forum </span> </a> </li> </ul> <ul class="devsite-nav-list" menu="Why TensorFlow" aria-label="Side menu" hidden> <li class="devsite-nav-item"> <a href="/about" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: About" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > About </span> </a> </li> <li class="devsite-nav-item"> <a href="/about/case-studies" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Case studies" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Case studies </span> </a> </li> </ul> </div> </div> </nav> </devsite-book-nav> <section id="gc-wrapper"> <main role="main" class="devsite-main-content" has-book-nav has-sidebar > <div class="devsite-sidebar"> <div class="devsite-sidebar-content"> <devsite-toc class="devsite-nav" role="navigation" aria-label="On this page" depth="2" scrollbars ></devsite-toc> <devsite-recommendations-sidebar class="nocontent devsite-nav"> </devsite-recommendations-sidebar> </div> </div> <devsite-content> <article class="devsite-article"><style> /* Styles inlined from /site-assets/css/style.css */ /* override theme */ table img { max-width: 100%; } /* .devsite-terminal virtualenv prompt */ .tfo-terminal-venv::before { content: "(venv) $ " !important; } /* .devsite-terminal root prompt */ .tfo-terminal-root::before { content: "# " !important; } /* Used in links for type annotations in function/method signatures */ .tfo-signature-link a, .tfo-signature-link a:visited, .tfo-signature-link a:hover, .tfo-signature-link a:focus, .tfo-signature-link a:hover *, .tfo-signature-link a:focus * { text-decoration: none !important; } .tfo-signature-link a, .tfo-signature-link a:visited { border-bottom: 1px dotted #1a73e8; } .tfo-signature-link a:focus { border-bottom-style: solid; } /* .devsite-terminal Windows prompt */ .tfo-terminal-windows::before { content: "C:\\> " !important; } /* .devsite-terminal Windows prompt w/ virtualenv */ .tfo-terminal-windows-venv::before { content: "(venv) C:\\> " !important; } .tfo-diff-green-one-level + * { background: rgba(175, 245, 162, .6) !important; } .tfo-diff-green + * > * { background: rgba(175, 245, 162, .6) !important; } .tfo-diff-green-list + ul > li:first-of-type { background: rgba(175, 245, 162, .6) !important; } .tfo-diff-red-one-level + * { background: rgba(255, 230, 230, .6) !important; text-decoration: line-through !important; } .tfo-diff-red + * > * { background: rgba(255, 230, 230, .6) !important; text-decoration: line-through !important; } .tfo-diff-red-list + ul > li:first-of-type { background: rgba(255, 230, 230, .6) !important; text-decoration: line-through !important; } devsite-code .tfo-notebook-code-cell-output { max-height: 300px; overflow: auto; background: rgba(255, 247, 237, 1); /* orange bg to distinguish from input code cells */ } devsite-code .tfo-notebook-code-cell-output + .devsite-code-buttons-container button { background: rgba(255, 247, 237, .7); /* orange bg to distinguish from input code cells */ } devsite-code[dark-code] .tfo-notebook-code-cell-output { background: rgba(64, 78, 103, 1); /* medium slate */ } devsite-code[dark-code] .tfo-notebook-code-cell-output + .devsite-code-buttons-container button { background: rgba(64, 78, 103, .7); /* medium slate */ } /* override default table styles for notebook buttons */ .devsite-table-wrapper .tfo-notebook-buttons { display: inline-block; margin-left: 3px; width: auto; } .tfo-notebook-buttons td { padding-left: 0; padding-right: 20px; } .tfo-notebook-buttons a, .tfo-notebook-buttons :link, .tfo-notebook-buttons :visited { border-radius: 8px; box-shadow: 0 1px 2px 0 rgba(60, 64, 67, .3), 0 1px 3px 1px rgba(60, 64, 67, .15); color: #202124; padding: 12px 17px; transition: box-shadow 0.2s; } .tfo-notebook-buttons a:hover, .tfo-notebook-buttons a:focus { box-shadow: 0 1px 2px 0 rgba(60, 64, 67, .3), 0 2px 6px 2px rgba(60, 64, 67, .15); } .tfo-notebook-buttons tr { background: 0; border: 0; } /* on rendered notebook page, remove link to webpage since we're already here */ .tfo-notebook-buttons:not(.tfo-api) td:first-child { display: none; } .tfo-notebook-buttons td > a { -webkit-box-align: center; -ms-flex-align: center; align-items: center; display: -webkit-box; display: -ms-flexbox; display: flex; } .tfo-notebook-buttons td > a > img { margin-right: 8px; } /* landing pages */ .tfo-landing-row-item-inset-white { background-color: #fff; padding: 32px; } .tfo-landing-row-item-inset-white ol, .tfo-landing-row-item-inset-white ul { padding-left: 20px; } /* colab callout button */ .colab-callout-row devsite-code { border-radius: 8px 8px 0 0; box-shadow: none; } .colab-callout-footer { background: #e3e4e7; border-radius: 0 0 8px 8px; color: #37474f; padding: 20px; } .colab-callout-row devsite-code[dark-code] + .colab-callout-footer { background: #3f4f66; } .colab-callout-footer > .button { margin-top: 4px; color: #ff5c00; } .colab-callout-footer > a > span { vertical-align: middle; color: #37474f; padding-left: 10px; font-size: 14px; } .colab-callout-row devsite-code[dark-code] + .colab-callout-footer > a > span { color: #fff; } a.colab-button { background: rgba(255, 255, 255, .75); border: solid 1px rgba(0, 0, 0, .08); border-bottom-color: rgba(0, 0, 0, .15); border-radius: 4px; color: #aaa; display: inline-block; font-size: 11px !important; font-weight: 300; line-height: 16px; padding: 4px 8px; text-decoration: none; text-transform: uppercase; } a.colab-button:hover { background: white; border-color: rgba(0, 0, 0, .2); color: #666; } a.colab-button span { background: url(/images/colab_logo_button.svg) no-repeat 1px 1px / 20px; border-radius: 4px; display: inline-block; padding-left: 24px; text-decoration: none; } @media screen and (max-width: 600px) { .tfo-notebook-buttons td { display: block; } } /* guide and tutorials landing page cards and sections */ .tfo-landing-page-card { padding: 16px; box-shadow: 0 0 36px rgba(0,0,0,0.1); border-radius: 10px; } /* Page section headings */ .tfo-landing-page-heading h2, h2.tfo-landing-page-heading { font-family: "Google Sans", sans-serif; color: #425066; font-size: 30px; font-weight: 700; line-height: 40px; } /* Item title headings */ .tfo-landing-page-heading h3, h3.tfo-landing-page-heading, .tfo-landing-page-card h3, h3.tfo-landing-page-card { font-family: "Google Sans", sans-serif; color: #425066; font-size: 20px; font-weight: 500; line-height: 26px; } /* List of tutorials notebooks for subsites */ .tfo-landing-page-resources-ul { padding-left: 15px } .tfo-landing-page-resources-ul > li { margin: 6px 0; } /* Temporary fix to hide product description in header on landing pages */ devsite-header .devsite-product-description { display: none; } </style> <div class="devsite-article-meta nocontent" role="navigation"> <ul class="devsite-breadcrumb-list" aria-label="Breadcrumb"> <li class="devsite-breadcrumb-item "> <a href="https://www.tensorflow.org/" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Breadcrumbs" data-value="1" track-type="globalNav" track-name="breadcrumb" track-metadata-position="1" track-metadata-eventdetail="TensorFlow" > TensorFlow </a> </li> <li class="devsite-breadcrumb-item "> <div class="devsite-breadcrumb-guillemet material-icons" aria-hidden="true"></div> <a href="https://www.tensorflow.org/learn" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Breadcrumbs" data-value="2" track-type="globalNav" track-name="breadcrumb" track-metadata-position="2" track-metadata-eventdetail="" > Learn </a> </li> <li class="devsite-breadcrumb-item "> <div class="devsite-breadcrumb-guillemet material-icons" aria-hidden="true"></div> <a href="https://www.tensorflow.org/tutorials" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Breadcrumbs" data-value="3" track-type="globalNav" track-name="breadcrumb" track-metadata-position="3" track-metadata-eventdetail="TensorFlow Core" > TensorFlow Core </a> </li> <li class="devsite-breadcrumb-item "> <div class="devsite-breadcrumb-guillemet material-icons" aria-hidden="true"></div> <a href="https://www.tensorflow.org/guide" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Breadcrumbs" data-value="4" track-type="globalNav" track-name="breadcrumb" track-metadata-position="4" track-metadata-eventdetail="" > Guide </a> </li> </ul> <devsite-thumb-rating position="header"> </devsite-thumb-rating> </div> <h1 class="devsite-page-title" tabindex="-1"> tf.data: Build TensorFlow input pipelines </h1> <devsite-feature-tooltip ack-key="AckCollectionsBookmarkTooltipDismiss" analytics-category="Site-Wide Custom Events" analytics-action-show="Callout Profile displayed" analytics-action-close="Callout Profile dismissed" analytics-label="Create Collection Callout" class="devsite-page-bookmark-tooltip nocontent" dismiss-button="true" id="devsite-collections-dropdown" dismiss-button-text="Dismiss" close-button-text="Got it"> <devsite-bookmark></devsite-bookmark> <span slot="popout-heading"> Stay organized with collections </span> <span slot="popout-contents"> Save and categorize content based on your preferences. </span> </devsite-feature-tooltip> <div class="devsite-page-title-meta"><devsite-view-release-notes></devsite-view-release-notes></div> <devsite-toc class="devsite-nav" depth="2" devsite-toc-embedded > </devsite-toc> <div class="devsite-article-body clearfix "> <p></p> <!-- DO NOT EDIT! Automatically generated file. --> <div itemscope itemtype="http://developers.google.com/ReferenceObject"> <meta itemprop="name" content="tf.data: Build TensorFlow input pipelines" /> <meta itemprop="path" content="Guide & Tutorials" /> <meta itemprop="property" content="tf.Variable"/> <meta itemprop="property" content="tf.cast"/> <meta itemprop="property" content="tf.data.Dataset"/> <meta itemprop="property" content="tf.data.TFRecordDataset"/> <meta itemprop="property" content="tf.data.TextLineDataset"/> <meta itemprop="property" content="tf.data.experimental.Counter"/> <meta itemprop="property" content="tf.data.experimental.CsvDataset"/> <meta itemprop="property" content="tf.data.experimental.make_csv_dataset"/> <meta itemprop="property" content="tf.fill"/> <meta itemprop="property" content="tf.image.convert_image_dtype"/> <meta itemprop="property" content="tf.image.resize"/> <meta itemprop="property" content="tf.io.FixedLenFeature"/> <meta itemprop="property" content="tf.io.decode_jpeg"/> <meta itemprop="property" content="tf.io.decode_png"/> <meta itemprop="property" content="tf.io.parse_example"/> <meta itemprop="property" content="tf.io.read_file"/> <meta itemprop="property" content="tf.keras.Sequential"/> <meta itemprop="property" content="tf.keras.datasets.fashion_mnist.load_data"/> <meta itemprop="property" content="tf.keras.layers.Dense"/> <meta itemprop="property" content="tf.keras.layers.Flatten"/> <meta itemprop="property" content="tf.keras.losses.SparseCategoricalCrossentropy"/> <meta itemprop="property" content="tf.keras.preprocessing.image.ImageDataGenerator"/> <meta itemprop="property" content="tf.keras.utils.get_file"/> <meta itemprop="property" content="tf.math.not_equal"/> <meta itemprop="property" content="tf.math.reduce_sum"/> <meta itemprop="property" content="tf.py_function"/> <meta itemprop="property" content="tf.random.uniform"/> <meta itemprop="property" content="tf.sparse.SparseTensor"/> <meta itemprop="property" content="tf.stack"/> <meta itemprop="property" content="tf.strings.split"/> <meta itemprop="property" content="tf.strings.substr"/> <meta itemprop="property" content="tf.train.Checkpoint"/> <meta itemprop="property" content="tf.train.CheckpointManager"/> <meta itemprop="property" content="tf.train.Example"/> </div> <table class="tfo-notebook-buttons" align="left"> <td> <a target="_blank" href="https://www.tensorflow.org/guide/data"><img src="https://www.tensorflow.org/images/tf_logo_32px.png">View on TensorFlow.org</a> </td> <td> <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/data.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png">Run in Google Colab</a> </td> <td> <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/guide/data.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png">View source on GitHub</a> </td> <td> <a href="https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/data.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png">Download notebook</a> </td> </table> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API enables you to build complex input pipelines from simple, reusable pieces. For example, the pipeline for an image model might aggregate data from files in a distributed file system, apply random perturbations to each image, and merge randomly selected images into a batch for training. The pipeline for a text model might involve extracting symbols from raw text data, converting them to embedding identifiers with a lookup table, and batching together sequences of different lengths. The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API makes it possible to handle large amounts of data, read from different data formats, and perform complex transformations.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API introduces a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> abstraction that represents a sequence of elements, in which each element consists of one or more components. For example, in an image pipeline, an element might be a single training example, with a pair of tensor components representing the image and its label.</p> <p>There are two distinct ways to create a dataset:</p> <ul> <li><p>A data <strong>source</strong> constructs a <code translate="no" dir="ltr">Dataset</code> from data stored in memory or in one or more files.</p></li> <li><p>A data <strong>transformation</strong> constructs a dataset from one or more <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> objects.</p></li> </ul> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">import tensorflow as tf </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 2024-08-15 01:37:36.963860: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered 2024-08-15 01:37:36.985171: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered 2024-08-15 01:37:36.991452: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">import pathlib import os import matplotlib.pyplot as plt import pandas as pd import numpy as np np.set_printoptions(precision=4) </code></pre> <h2 id="basic_mechanics" data-text="Basic mechanics" tabindex="-1">Basic mechanics</h2> <p><a id="basic-mechanics"/></p> <p>To create an input pipeline, you must start with a data <em>source</em>. For example, to construct a <code translate="no" dir="ltr">Dataset</code> from data in memory, you can use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors"><code translate="no" dir="ltr">tf.data.Dataset.from_tensors()</code></a> or <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices"><code translate="no" dir="ltr">tf.data.Dataset.from_tensor_slices()</code></a>. Alternatively, if your input data is stored in a file in the recommended TFRecord format, you can use <a href="https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset"><code translate="no" dir="ltr">tf.data.TFRecordDataset()</code></a>.</p> <p>Once you have a <code translate="no" dir="ltr">Dataset</code> object, you can <em>transform</em> it into a new <code translate="no" dir="ltr">Dataset</code> by chaining method calls on the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> object. For example, you can apply per-element transformations such as <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map</code></a>, and multi-element transformations such as <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a>. Refer to the documentation for <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> for a complete list of transformations.</p> <p>The <code translate="no" dir="ltr">Dataset</code> object is a Python iterable. This makes it possible to consume its elements using a for loop:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset = tf.data.Dataset.from_tensor_slices([8, 3, 0, 8, 2, 1]) dataset </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> WARNING: All log messages before absl::InitializeLog() is called are written to STDERR I0000 00:00:1723685859.835217 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.839003 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.842691 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.846561 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.858030 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.861635 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.865105 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.868512 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.871403 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.874859 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.878307 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.881840 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.098140 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.100277 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.102280 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.104281 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.106309 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.108307 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.110218 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.112117 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.114046 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.116014 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.117904 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.119808 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.158075 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.160123 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.162060 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.163993 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.165963 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.167940 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.169863 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.171778 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.173638 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.176135 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.178420 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.180782 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 <_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)> </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for elem in dataset: print(elem.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 8 3 0 8 2 1 </pre> <p>Or by explicitly creating a Python iterator using <code translate="no" dir="ltr">iter</code> and consuming its elements using <code translate="no" dir="ltr">next</code>:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">it = iter(dataset) print(next(it).numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 8 </pre> <p>Alternatively, dataset elements can be consumed using the <code translate="no" dir="ltr">reduce</code> transformation, which reduces all elements to produce a single result. The following example illustrates how to use the <code translate="no" dir="ltr">reduce</code> transformation to compute the sum of a dataset of integers.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">print(dataset.reduce(0, lambda state, value: state + value).numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 22 </pre> <!-- TODO(jsimsa): Talk about <a href="https://www.tensorflow.org/api_docs/python/tf/function"><code translate="no" dir="ltr">tf.function</code></a> support. --> <p><a id="dataset_structure"></a></p> <h3 id="dataset_structure" data-text="Dataset structure" tabindex="-1">Dataset structure</h3> <p>A dataset produces a sequence of <em>elements</em>, where each element is the same (nested) structure of <em>components</em>. Individual components of the structure can be of any type representable by <a href="https://www.tensorflow.org/api_docs/python/tf/TypeSpec"><code translate="no" dir="ltr">tf.TypeSpec</code></a>, including <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a>, <a href="https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor"><code translate="no" dir="ltr">tf.sparse.SparseTensor</code></a>, <a href="https://www.tensorflow.org/api_docs/python/tf/RaggedTensor"><code translate="no" dir="ltr">tf.RaggedTensor</code></a>, <a href="https://www.tensorflow.org/api_docs/python/tf/TensorArray"><code translate="no" dir="ltr">tf.TensorArray</code></a>, or <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a>.</p> <p>The Python constructs that can be used to express the (nested) structure of elements include <code translate="no" dir="ltr">tuple</code>, <code translate="no" dir="ltr">dict</code>, <code translate="no" dir="ltr">NamedTuple</code>, and <code translate="no" dir="ltr">OrderedDict</code>. In particular, <code translate="no" dir="ltr">list</code> is not a valid construct for expressing the structure of dataset elements. This is because early <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> users felt strongly about <code translate="no" dir="ltr">list</code> inputs (for example, when passed to <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors"><code translate="no" dir="ltr">tf.data.Dataset.from_tensors</code></a>) being automatically packed as tensors and <code translate="no" dir="ltr">list</code> outputs (for example, return values of user-defined functions) being coerced into a <code translate="no" dir="ltr">tuple</code>. As a consequence, if you would like a <code translate="no" dir="ltr">list</code> input to be treated as a structure, you need to convert it into <code translate="no" dir="ltr">tuple</code> and if you would like a <code translate="no" dir="ltr">list</code> output to be a single component, then you need to explicitly pack it using <a href="https://www.tensorflow.org/api_docs/python/tf/stack"><code translate="no" dir="ltr">tf.stack</code></a>.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#element_spec"><code translate="no" dir="ltr">Dataset.element_spec</code></a> property allows you to inspect the type of each element component. The property returns a <em>nested structure</em> of <a href="https://www.tensorflow.org/api_docs/python/tf/TypeSpec"><code translate="no" dir="ltr">tf.TypeSpec</code></a> objects, matching the structure of the element, which may be a single component, a tuple of components, or a nested tuple of components. For example:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10])) dataset1.element_spec </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> TensorSpec(shape=(10,), dtype=tf.float32, name=None) </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset2 = tf.data.Dataset.from_tensor_slices( (tf.random.uniform([4]), tf.random.uniform([4, 100], maxval=100, dtype=tf.int32))) dataset2.element_spec </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> (TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None)) </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset3 = tf.data.Dataset.zip((dataset1, dataset2)) dataset3.element_spec </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> (TensorSpec(shape=(10,), dtype=tf.float32, name=None), (TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None))) </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr"># Dataset containing a sparse tensor. dataset4 = tf.data.Dataset.from_tensors(tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])) dataset4.element_spec </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> SparseTensorSpec(TensorShape([3, 4]), tf.int32) </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr"># Use value_type to see the type of value represented by the element spec dataset4.element_spec.value_type </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> tensorflow.python.framework.sparse_tensor.SparseTensor </pre> <p>The <code translate="no" dir="ltr">Dataset</code> transformations support datasets of any structure. When using the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map</code></a>, and <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#filter"><code translate="no" dir="ltr">Dataset.filter</code></a> transformations, which apply a function to each element, the element structure determines the arguments of the function:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset1 = tf.data.Dataset.from_tensor_slices( tf.random.uniform([4, 10], minval=1, maxval=10, dtype=tf.int32)) dataset1 </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_TensorSliceDataset element_spec=TensorSpec(shape=(10,), dtype=tf.int32, name=None)> </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for z in dataset1: print(z.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [3 4 1 6 1 8 5 8 9 4] [2 7 6 9 2 6 6 4 9 7] [8 7 9 6 3 4 5 8 4 4] [2 1 1 1 3 9 7 8 6 8] </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset2 = tf.data.Dataset.from_tensor_slices( (tf.random.uniform([4]), tf.random.uniform([4, 100], maxval=100, dtype=tf.int32))) dataset2 </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_TensorSliceDataset element_spec=(TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None))> </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset3 = tf.data.Dataset.zip((dataset1, dataset2)) dataset3 </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_ZipDataset element_spec=(TensorSpec(shape=(10,), dtype=tf.int32, name=None), (TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None)))> </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for a, (b,c) in dataset3: print('shapes: {a.shape}, {b.shape}, {c.shape}'.format(a=a, b=b, c=c)) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> shapes: (10,), (), (100,) shapes: (10,), (), (100,) shapes: (10,), (), (100,) shapes: (10,), (), (100,) </pre> <h2 id="reading_input_data" data-text="Reading input data" tabindex="-1">Reading input data</h2> <h3 id="consuming_numpy_arrays" data-text="Consuming NumPy arrays" tabindex="-1">Consuming NumPy arrays</h3> <p>Refer to the <a href="https://www.tensorflow.org/tutorials/load_data/numpy">Loading NumPy arrays</a> tutorial for more examples.</p> <p>If all of your input data fits in memory, the simplest way to create a <code translate="no" dir="ltr">Dataset</code> from them is to convert them to <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a> objects and use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices"><code translate="no" dir="ltr">Dataset.from_tensor_slices</code></a>.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">train, test = tf.keras.datasets.fashion_mnist.load_data() </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz 29515/29515 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz 26421880/26421880 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz 5148/5148 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz 4422102/4422102 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">images, labels = train images = images/255 dataset = tf.data.Dataset.from_tensor_slices((images, labels)) dataset </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_TensorSliceDataset element_spec=(TensorSpec(shape=(28, 28), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.uint8, name=None))> </pre> <aside class="note"><strong>Note:</strong><span> The above code snippet will embed the <code translate="no" dir="ltr">features</code> and <code translate="no" dir="ltr">labels</code> arrays in your TensorFlow graph as <a href="https://www.tensorflow.org/api_docs/python/tf/constant"><code translate="no" dir="ltr">tf.constant()</code></a> operations. This works well for a small dataset, but wastes memory---because the contents of the array will be copied multiple times---and can run into the 2GB limit for the <code translate="no" dir="ltr">tf.GraphDef</code> protocol buffer.</span></aside> <h3 id="consuming_python_generators" data-text="Consuming Python generators" tabindex="-1">Consuming Python generators</h3> <p>Another common data source that can easily be ingested as a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> is the python generator.</p> <aside class="caution"><strong>Caution:</strong><span> While this is a convenient approach it has limited portability and scalability. It must run in the same python process that created the generator, and is still subject to the Python <a href="https://en.wikipedia.org/wiki/Global_interpreter_lock">GIL</a>.</span></aside><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def count(stop): i = 0 while i<stop: yield i i += 1 </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for n in count(5): print(n) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 0 1 2 3 4 </pre> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator"><code translate="no" dir="ltr">Dataset.from_generator</code></a> constructor converts the python generator to a fully functional <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a>.</p> <p>The constructor takes a callable as input, not an iterator. This allows it to restart the generator when it reaches the end. It takes an optional <code translate="no" dir="ltr">args</code> argument, which is passed as the callable's arguments.</p> <p>The <code translate="no" dir="ltr">output_types</code> argument is required because <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> builds a <a href="https://www.tensorflow.org/api_docs/python/tf/Graph"><code translate="no" dir="ltr">tf.Graph</code></a> internally, and graph edges require a <code translate="no" dir="ltr">tf.dtype</code>.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes = (), ) </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for count_batch in ds_counter.repeat().batch(10).take(10): print(count_batch.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14 15 16 17 18 19] [20 21 22 23 24 0 1 2 3 4] [ 5 6 7 8 9 10 11 12 13 14] [15 16 17 18 19 20 21 22 23 24] [0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14 15 16 17 18 19] [20 21 22 23 24 0 1 2 3 4] [ 5 6 7 8 9 10 11 12 13 14] [15 16 17 18 19 20 21 22 23 24] </pre> <p>The <code translate="no" dir="ltr">output_shapes</code> argument is not <em>required</em> but is highly recommended as many TensorFlow operations do not support tensors with an unknown rank. If the length of a particular axis is unknown or variable, set it as <code translate="no" dir="ltr">None</code> in the <code translate="no" dir="ltr">output_shapes</code>.</p> <p>It's also important to note that the <code translate="no" dir="ltr">output_shapes</code> and <code translate="no" dir="ltr">output_types</code> follow the same nesting rules as other dataset methods.</p> <p>Here is an example generator that demonstrates both aspects: it returns tuples of arrays, where the second array is a vector with unknown length.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def gen_series(): i = 0 while True: size = np.random.randint(0, 10) yield i, np.random.normal(size=(size,)) i += 1 </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for i, series in gen_series(): print(i, ":", str(series)) if i > 5: break </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 0 : [1.1274] 1 : [-0.5822 0.8497 -1.3594 0.2083 -0.3007 1.2171 -0.3551] 2 : [-1.2016 -0.1085 0.4088 0.0801 1.4901 -2.3102] 3 : [ 0.5816 -0.6447 -0.9673 0.5282 0.52 -0.2634 0.3001 0.8753] 4 : [ 0.0888 0.071 1.26 -0.347 -0.2643 -1.0757 0.4192] 5 : [ 0.4911 0.8377 0.3576 -0.0351 0.9663] 6 : [-0.1996 0.5808 0.4589 1.8229 -0.5712] </pre> <p>The first output is an <code translate="no" dir="ltr">int32</code> the second is a <code translate="no" dir="ltr">float32</code>.</p> <p>The first item is a scalar, shape <code translate="no" dir="ltr">()</code>, and the second is a vector of unknown length, shape <code translate="no" dir="ltr">(None,)</code></p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">ds_series = tf.data.Dataset.from_generator( gen_series, output_types=(tf.int32, tf.float32), output_shapes=((), (None,))) ds_series </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_FlatMapDataset element_spec=(TensorSpec(shape=(), dtype=tf.int32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))> </pre> <p>Now it can be used like a regular <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a>. Note that when batching a dataset with a variable shape, you need to use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#padded_batch"><code translate="no" dir="ltr">Dataset.padded_batch</code></a>.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">ds_series_batch = ds_series.shuffle(20).padded_batch(10) ids, sequence_batch = next(iter(ds_series_batch)) print(ids.numpy()) print() print(sequence_batch.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [ 5 19 20 11 4 10 17 8 27 18] [[-0.7479 0.867 -0.0558 -1.0825 -0.4113 0.0312 0. ] [-1.0498 -0.3941 0. 0. 0. 0. 0. ] [-0.2709 0.0236 0.0746 0.3704 0. 0. 0. ] [ 1.6525 -0.861 0.5642 0.9961 0.7463 0. 0. ] [ 0.4122 -0.118 1.5491 1.9578 0. 0. 0. ] [-1.6237 1.3636 -0.2079 0. 0. 0. 0. ] [ 0. 0. 0. 0. 0. 0. 0. ] [ 0. 0. 0. 0. 0. 0. 0. ] [-1.3268 0.9881 0.531 0. 0. 0. 0. ] [ 0.0284 -1.4974 -0.545 -1.2795 0.7032 1.4058 0.1412]] </pre> <p>For a more realistic example, try wrapping <a href="https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator"><code translate="no" dir="ltr">preprocessing.image.ImageDataGenerator</code></a> as a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a>.</p> <p>First download the data:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">flowers = tf.keras.utils.get_file( 'flower_photos', 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz', untar=True) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz 228813984/228813984 ━━━━━━━━━━━━━━━━━━━━ 2s 0us/step </pre> <p>Create the <a href="https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator"><code translate="no" dir="ltr">image.ImageDataGenerator</code></a></p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">img_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, rotation_range=20) </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">images, labels = next(img_gen.flow_from_directory(flowers)) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Found 3670 images belonging to 5 classes. </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">print(images.dtype, images.shape) print(labels.dtype, labels.shape) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> float32 (32, 256, 256, 3) float32 (32, 5) </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">ds = tf.data.Dataset.from_generator( lambda: img_gen.flow_from_directory(flowers), output_types=(tf.float32, tf.float32), output_shapes=([32,256,256,3], [32,5]) ) ds.element_spec </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> (TensorSpec(shape=(32, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(32, 5), dtype=tf.float32, name=None)) </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for images, labels in ds.take(1): print('images.shape: ', images.shape) print('labels.shape: ', labels.shape) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Found 3670 images belonging to 5 classes. images.shape: (32, 256, 256, 3) labels.shape: (32, 5) </pre> <h3 id="consuming_tfrecord_data" data-text="Consuming TFRecord data" tabindex="-1">Consuming TFRecord data</h3> <p>Refer to the <a href="https://www.tensorflow.org/tutorials/load_data/tfrecord">Loading TFRecords</a> tutorial for an end-to-end example.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API supports a variety of file formats so that you can process large datasets that do not fit in memory. For example, the TFRecord file format is a simple record-oriented binary format that many TensorFlow applications use for training data. The <a href="https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset"><code translate="no" dir="ltr">tf.data.TFRecordDataset</code></a> class enables you to stream over the contents of one or more TFRecord files as part of an input pipeline.</p> <p>Here is an example using the test file from the French Street Name Signs (FSNS).</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr"># Creates a dataset that reads all of the examples from two files. fsns_test_file = tf.keras.utils.get_file("fsns.tfrec", "https://storage.googleapis.com/download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001") </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001 7904079/7904079 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step </pre> <p>The <code translate="no" dir="ltr">filenames</code> argument to the <code translate="no" dir="ltr">TFRecordDataset</code> initializer can either be a string, a list of strings, or a <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a> of strings. Therefore if you have two sets of files for training and validation purposes, you can create a factory method that produces the dataset, taking filenames as an input argument:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset = tf.data.TFRecordDataset(filenames = [fsns_test_file]) dataset </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)> </pre> <p>Many TensorFlow projects use serialized <a href="https://www.tensorflow.org/api_docs/python/tf/train/Example"><code translate="no" dir="ltr">tf.train.Example</code></a> records in their TFRecord files. These need to be decoded before they can be inspected:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">raw_example = next(iter(dataset)) parsed = tf.train.Example.FromString(raw_example.numpy()) parsed.features.feature['image/text'] </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> bytes_list { value: "Rue Perreyon" } </pre> <h3 id="consuming_text_data" data-text="Consuming text data" tabindex="-1">Consuming text data</h3> <p>Refer to the <a href="https://www.tensorflow.org/tutorials/load_data/text">Load text</a> tutorial for an end-to-end example.</p> <p>Many datasets are distributed as one or more text files. The <a href="https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset"><code translate="no" dir="ltr">tf.data.TextLineDataset</code></a> provides an easy way to extract lines from one or more text files. Given one or more filenames, a <code translate="no" dir="ltr">TextLineDataset</code> will produce one string-valued element per line of those files.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">directory_url = 'https://storage.googleapis.com/download.tensorflow.org/data/illiad/' file_names = ['cowper.txt', 'derby.txt', 'butler.txt'] file_paths = [ tf.keras.utils.get_file(file_name, directory_url + file_name) for file_name in file_names ] </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/cowper.txt 815980/815980 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/derby.txt 809730/809730 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/butler.txt 807992/807992 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset = tf.data.TextLineDataset(file_paths) </code></pre> <p>Here are the first few lines of the first file:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for line in dataset.take(5): print(line.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> b"\xef\xbb\xbfAchilles sing, O Goddess! Peleus' son;" b'His wrath pernicious, who ten thousand woes' b"Caused to Achaia's host, sent many a soul" b'Illustrious into Ades premature,' b'And Heroes gave (so stood the will of Jove)' </pre> <p>To alternate lines between files use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#interleave"><code translate="no" dir="ltr">Dataset.interleave</code></a>. This makes it easier to shuffle files together. Here are the first, second and third lines from each translation:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">files_ds = tf.data.Dataset.from_tensor_slices(file_paths) lines_ds = files_ds.interleave(tf.data.TextLineDataset, cycle_length=3) for i, line in enumerate(lines_ds.take(9)): if i % 3 == 0: print() print(line.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> b"\xef\xbb\xbfAchilles sing, O Goddess! Peleus' son;" b"\xef\xbb\xbfOf Peleus' son, Achilles, sing, O Muse," b'\xef\xbb\xbfSing, O goddess, the anger of Achilles son of Peleus, that brought' b'His wrath pernicious, who ten thousand woes' b'The vengeance, deep and deadly; whence to Greece' b'countless ills upon the Achaeans. Many a brave soul did it send' b"Caused to Achaia's host, sent many a soul" b'Unnumbered ills arose; which many a soul' b'hurrying down to Hades, and many a hero did it yield a prey to dogs and' </pre> <p>By default, a <code translate="no" dir="ltr">TextLineDataset</code> yields <em>every</em> line of each file, which may not be desirable, for example, if the file starts with a header line, or contains comments. These lines can be removed using the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#skip"><code translate="no" dir="ltr">Dataset.skip()</code></a> or <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#filter"><code translate="no" dir="ltr">Dataset.filter</code></a> transformations. Here, you skip the first line, then filter to find only survivors.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">titanic_file = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv") titanic_lines = tf.data.TextLineDataset(titanic_file) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv 30874/30874 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for line in titanic_lines.take(10): print(line.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> b'survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone' b'0,male,22.0,1,0,7.25,Third,unknown,Southampton,n' b'1,female,38.0,1,0,71.2833,First,C,Cherbourg,n' b'1,female,26.0,0,0,7.925,Third,unknown,Southampton,y' b'1,female,35.0,1,0,53.1,First,C,Southampton,n' b'0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y' b'0,male,2.0,3,1,21.075,Third,unknown,Southampton,n' b'1,female,27.0,0,2,11.1333,Third,unknown,Southampton,n' b'1,female,14.0,1,0,30.0708,Second,unknown,Cherbourg,n' b'1,female,4.0,1,1,16.7,Third,G,Southampton,n' </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def survived(line): return tf.not_equal(tf.strings.substr(line, 0, 1), "0") survivors = titanic_lines.skip(1).filter(survived) </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for line in survivors.take(10): print(line.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> b'1,female,38.0,1,0,71.2833,First,C,Cherbourg,n' b'1,female,26.0,0,0,7.925,Third,unknown,Southampton,y' b'1,female,35.0,1,0,53.1,First,C,Southampton,n' b'1,female,27.0,0,2,11.1333,Third,unknown,Southampton,n' b'1,female,14.0,1,0,30.0708,Second,unknown,Cherbourg,n' b'1,female,4.0,1,1,16.7,Third,G,Southampton,n' b'1,male,28.0,0,0,13.0,Second,unknown,Southampton,y' b'1,female,28.0,0,0,7.225,Third,unknown,Cherbourg,y' b'1,male,28.0,0,0,35.5,First,A,Southampton,y' b'1,female,38.0,1,5,31.3875,Third,unknown,Southampton,n' </pre> <h3 id="consuming_csv_data" data-text="Consuming CSV data" tabindex="-1">Consuming CSV data</h3> <p>Refer to the <a href="https://www.tensorflow.org/tutorials/load_data/csv">Loading CSV Files</a> and <a href="https://www.tensorflow.org/tutorials/load_data/pandas_dataframe">Loading Pandas DataFrames</a> tutorials for more examples.</p> <p>The CSV file format is a popular format for storing tabular data in plain text.</p> <p>For example:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">titanic_file = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv") </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">df = pd.read_csv(titanic_file) df.head() </code></pre> <devsite-iframe><iframe src="https://www.tensorflow.org/frame/guide/data_a9615c0f2fcea96ba5758f00e5decfd7a95fb2936ed60626ecd0888cf9e08c89.frame" class="framebox inherit-locale " allow="clipboard-write https://tensorflow-dot-devsite-v2-prod-3p.appspot.com" allowfullscreen is-upgraded></iframe></devsite-iframe> <p>If your data fits in memory the same <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices"><code translate="no" dir="ltr">Dataset.from_tensor_slices</code></a> method works on dictionaries, allowing this data to be easily imported:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">titanic_slices = tf.data.Dataset.from_tensor_slices(dict(df)) for feature_batch in titanic_slices.take(1): for key, value in feature_batch.items(): print(" {!r:20s}: {}".format(key, value)) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 'survived' : 0 'sex' : b'male' 'age' : 22.0 'n_siblings_spouses': 1 'parch' : 0 'fare' : 7.25 'class' : b'Third' 'deck' : b'unknown' 'embark_town' : b'Southampton' 'alone' : b'n' </pre> <p>A more scalable approach is to load from disk as necessary.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> module provides methods to extract records from one or more CSV files that comply with <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/experimental/make_csv_dataset"><code translate="no" dir="ltr">tf.data.experimental.make_csv_dataset</code></a> function is the high-level interface for reading sets of CSV files. It supports column type inference and many other features, like batching and shuffling, to make usage simple.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">titanic_batches = tf.data.experimental.make_csv_dataset( titanic_file, batch_size=4, label_name="survived") </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for feature_batch, label_batch in titanic_batches.take(1): print("'survived': {}".format(label_batch)) print("features:") for key, value in feature_batch.items(): print(" {!r:20s}: {}".format(key, value)) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 'survived': [0 0 0 0] features: 'sex' : [b'male' b'male' b'male' b'male'] 'age' : [28. 46. 28. 26.] 'n_siblings_spouses': [0 1 0 0] 'parch' : [1 0 0 0] 'fare' : [33. 61.175 8.05 7.8875] 'class' : [b'Second' b'First' b'Third' b'Third'] 'deck' : [b'unknown' b'E' b'unknown' b'unknown'] 'embark_town' : [b'Southampton' b'Southampton' b'Southampton' b'Southampton'] 'alone' : [b'n' b'n' b'y' b'y'] </pre> <p>You can use the <code translate="no" dir="ltr">select_columns</code> argument if you only need a subset of columns.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">titanic_batches = tf.data.experimental.make_csv_dataset( titanic_file, batch_size=4, label_name="survived", select_columns=['class', 'fare', 'survived']) </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for feature_batch, label_batch in titanic_batches.take(1): print("'survived': {}".format(label_batch)) for key, value in feature_batch.items(): print(" {!r:20s}: {}".format(key, value)) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 'survived': [1 1 0 0] 'fare' : [10.5 35.5 12.875 29.125] 'class' : [b'Second' b'First' b'Second' b'Third'] </pre> <p>There is also a lower-level <a href="https://www.tensorflow.org/api_docs/python/tf/data/experimental/CsvDataset"><code translate="no" dir="ltr">experimental.CsvDataset</code></a> class which provides finer grained control. It does not support column type inference. Instead you must specify the type of each column.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">titanic_types = [tf.int32, tf.string, tf.float32, tf.int32, tf.int32, tf.float32, tf.string, tf.string, tf.string, tf.string] dataset = tf.data.experimental.CsvDataset(titanic_file, titanic_types , header=True) for line in dataset.take(10): print([item.numpy() for item in line]) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [0, b'male', 22.0, 1, 0, 7.25, b'Third', b'unknown', b'Southampton', b'n'] [1, b'female', 38.0, 1, 0, 71.2833, b'First', b'C', b'Cherbourg', b'n'] [1, b'female', 26.0, 0, 0, 7.925, b'Third', b'unknown', b'Southampton', b'y'] [1, b'female', 35.0, 1, 0, 53.1, b'First', b'C', b'Southampton', b'n'] [0, b'male', 28.0, 0, 0, 8.4583, b'Third', b'unknown', b'Queenstown', b'y'] [0, b'male', 2.0, 3, 1, 21.075, b'Third', b'unknown', b'Southampton', b'n'] [1, b'female', 27.0, 0, 2, 11.1333, b'Third', b'unknown', b'Southampton', b'n'] [1, b'female', 14.0, 1, 0, 30.0708, b'Second', b'unknown', b'Cherbourg', b'n'] [1, b'female', 4.0, 1, 1, 16.7, b'Third', b'G', b'Southampton', b'n'] [0, b'male', 20.0, 0, 0, 8.05, b'Third', b'unknown', b'Southampton', b'y'] </pre> <p>If some columns are empty, this low-level interface allows you to provide default values instead of column types.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">%%writefile missing.csv 1,2,3,4 ,2,3,4 1,,3,4 1,2,,4 1,2,3, ,,, </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Writing missing.csv </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr"># Creates a dataset that reads all of the records from two CSV files, each with # four float columns which may have missing values. record_defaults = [999,999,999,999] dataset = tf.data.experimental.CsvDataset("missing.csv", record_defaults) dataset = dataset.map(lambda *items: tf.stack(items)) dataset </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_MapDataset element_spec=TensorSpec(shape=(4,), dtype=tf.int32, name=None)> </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for line in dataset: print(line.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [1 2 3 4] [999 2 3 4] [ 1 999 3 4] [ 1 2 999 4] [ 1 2 3 999] [999 999 999 999] </pre> <p>By default, a <code translate="no" dir="ltr">CsvDataset</code> yields <em>every</em> column of <em>every</em> line of the file, which may not be desirable, for example if the file starts with a header line that should be ignored, or if some columns are not required in the input. These lines and fields can be removed with the <code translate="no" dir="ltr">header</code> and <code translate="no" dir="ltr">select_cols</code> arguments respectively.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr"># Creates a dataset that reads all of the records from two CSV files with # headers, extracting float data from columns 2 and 4. record_defaults = [999, 999] # Only provide defaults for the selected columns dataset = tf.data.experimental.CsvDataset("missing.csv", record_defaults, select_cols=[1, 3]) dataset = dataset.map(lambda *items: tf.stack(items)) dataset </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_MapDataset element_spec=TensorSpec(shape=(2,), dtype=tf.int32, name=None)> </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for line in dataset: print(line.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [2 4] [2 4] [999 4] [2 4] [ 2 999] [999 999] </pre> <h3 id="consuming_sets_of_files" data-text="Consuming sets of files" tabindex="-1">Consuming sets of files</h3> <p>There are many datasets distributed as a set of files, where each file is an example.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">flowers_root = tf.keras.utils.get_file( 'flower_photos', 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz', untar=True) flowers_root = pathlib.Path(flowers_root) </code></pre><aside class="note"><strong>Note:</strong><span> these images are licensed CC-BY, see LICENSE.txt for details.</span></aside> <p>The root directory contains a directory for each class:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for item in flowers_root.glob("*"): print(item.name) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> daisy tulips sunflowers LICENSE.txt dandelion roses </pre> <p>The files in each class directory are examples:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">list_ds = tf.data.Dataset.list_files(str(flowers_root/'*/*')) for f in list_ds.take(5): print(f.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> b'/home/kbuilder/.keras/datasets/flower_photos/tulips/4955884820_7e4ce4d7e5_m.jpg' b'/home/kbuilder/.keras/datasets/flower_photos/dandelion/6250363717_17732e992e_n.jpg' b'/home/kbuilder/.keras/datasets/flower_photos/tulips/14278331403_4c475f9a9b.jpg' b'/home/kbuilder/.keras/datasets/flower_photos/dandelion/480621885_4c8b50fa11_m.jpg' b'/home/kbuilder/.keras/datasets/flower_photos/tulips/5716293002_a8be6a6dd3_n.jpg' </pre> <p>Read the data using the <a href="https://www.tensorflow.org/api_docs/python/tf/io/read_file"><code translate="no" dir="ltr">tf.io.read_file</code></a> function and extract the label from the path, returning <code translate="no" dir="ltr">(image, label)</code> pairs:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def process_path(file_path): label = tf.strings.split(file_path, os.sep)[-2] return tf.io.read_file(file_path), label labeled_ds = list_ds.map(process_path) </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for image_raw, label_text in labeled_ds.take(1): print(repr(image_raw.numpy()[:100])) print() print(label_text.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x03\x02\x02\x03\x02\x02\x03\x03\x03\x03\x04\x03\x03\x04\x05\x08\x05\x05\x04\x04\x05\n\x07\x07\x06\x08\x0c\n\x0c\x0c\x0b\n\x0b\x0b\r\x0e\x12\x10\r\x0e\x11\x0e\x0b\x0b\x10\x16\x10\x11\x13\x14\x15\x15\x15\x0c\x0f\x17\x18\x16\x14\x18\x12\x14\x15\x14\xff\xdb\x00C\x01\x03\x04\x04\x05\x04\x05' b'dandelion' </pre> <!-- TODO(mrry): Add this section. ### Handling text data with unusual sizes --> <h2 id="batching_dataset_elements" data-text="Batching dataset elements" tabindex="-1">Batching dataset elements</h2> <h3 id="simple_batching" data-text="Simple batching" tabindex="-1">Simple batching</h3> <p>The simplest form of batching stacks <code translate="no" dir="ltr">n</code> consecutive elements of a dataset into a single element. The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch()</code></a> transformation does exactly this, with the same constraints as the <a href="https://www.tensorflow.org/api_docs/python/tf/stack"><code translate="no" dir="ltr">tf.stack()</code></a> operator, applied to each component of the elements: i.e., for each component <em>i</em>, all elements must have a tensor of the exact same shape.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">inc_dataset = tf.data.Dataset.range(100) dec_dataset = tf.data.Dataset.range(0, -100, -1) dataset = tf.data.Dataset.zip((inc_dataset, dec_dataset)) batched_dataset = dataset.batch(4) for batch in batched_dataset.take(4): print([arr.numpy() for arr in batch]) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [array([0, 1, 2, 3]), array([ 0, -1, -2, -3])] [array([4, 5, 6, 7]), array([-4, -5, -6, -7])] [array([ 8, 9, 10, 11]), array([ -8, -9, -10, -11])] [array([12, 13, 14, 15]), array([-12, -13, -14, -15])] </pre> <p>While <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> tries to propagate shape information, the default settings of <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> result in an unknown batch size because the last batch may not be full. Note the <code translate="no" dir="ltr">None</code>s in the shape:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">batched_dataset </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_BatchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))> </pre> <p>Use the <code translate="no" dir="ltr">drop_remainder</code> argument to ignore that last batch, and get full shape propagation:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">batched_dataset = dataset.batch(7, drop_remainder=True) batched_dataset </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_BatchDataset element_spec=(TensorSpec(shape=(7,), dtype=tf.int64, name=None), TensorSpec(shape=(7,), dtype=tf.int64, name=None))> </pre> <h3 id="batching_tensors_with_padding" data-text="Batching tensors with padding" tabindex="-1">Batching tensors with padding</h3> <p>The above recipe works for tensors that all have the same size. However, many models (including sequence models) work with input data that can have varying size (for example, sequences of different lengths). To handle this case, the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#padded_batch"><code translate="no" dir="ltr">Dataset.padded_batch</code></a> transformation enables you to batch tensors of different shapes by specifying one or more dimensions in which they may be padded.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset = tf.data.Dataset.range(100) dataset = dataset.map(lambda x: tf.fill([tf.cast(x, tf.int32)], x)) dataset = dataset.padded_batch(4, padded_shapes=(None,)) for batch in dataset.take(2): print(batch.numpy()) print() </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [[0 0 0] [1 0 0] [2 2 0] [3 3 3]] [[4 4 4 4 0 0 0] [5 5 5 5 5 0 0] [6 6 6 6 6 6 0] [7 7 7 7 7 7 7]] </pre> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#padded_batch"><code translate="no" dir="ltr">Dataset.padded_batch</code></a> transformation allows you to set different padding for each dimension of each component, and it may be variable-length (signified by <code translate="no" dir="ltr">None</code> in the example above) or constant-length. It is also possible to override the padding value, which defaults to 0.</p> <!-- TODO(mrry): Add this section. ### Dense ragged -> tf.SparseTensor --> <h2 id="training_workflows" data-text="Training workflows" tabindex="-1">Training workflows</h2> <h3 id="processing_multiple_epochs" data-text="Processing multiple epochs" tabindex="-1">Processing multiple epochs</h3> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API offers two main ways to process multiple epochs of the same data.</p> <p>The simplest way to iterate over a dataset in multiple epochs is to use the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat()</code></a> transformation. First, create a dataset of titanic data:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">titanic_file = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv") titanic_lines = tf.data.TextLineDataset(titanic_file) </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def plot_batch_sizes(ds): batch_sizes = [batch.shape[0] for batch in ds] plt.bar(range(len(batch_sizes)), batch_sizes) plt.xlabel('Batch number') plt.ylabel('Batch size') </code></pre> <p>Applying the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat()</code></a> transformation with no arguments will repeat the input indefinitely.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat</code></a> transformation concatenates its arguments without signaling the end of one epoch and the beginning of the next epoch. Because of this a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> applied after <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat</code></a> will yield batches that straddle epoch boundaries:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">titanic_batches = titanic_lines.repeat(3).batch(128) plot_batch_sizes(titanic_batches) </code></pre> <p><img src="/static/guide/data_files/output_nZ0G1cztoSHX_0.png" alt="png"></p> <p>If you need clear epoch separation, put <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> before the repeat:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">titanic_batches = titanic_lines.batch(128).repeat(3) plot_batch_sizes(titanic_batches) </code></pre> <p><img src="/static/guide/data_files/output_wmbmdK1qoSHS_0.png" alt="png"></p> <p>If you would like to perform a custom computation (for example, to collect statistics) at the end of each epoch then it's simplest to restart the dataset iteration on each epoch:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">epochs = 3 dataset = titanic_lines.batch(128) for epoch in range(epochs): for batch in dataset: print(batch.shape) print("End of epoch: ", epoch) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> (128,) (128,) (128,) (128,) (116,) End of epoch: 0 (128,) (128,) (128,) (128,) (116,) End of epoch: 1 (128,) (128,) (128,) (128,) (116,) End of epoch: 2 </pre> <h3 id="randomly_shuffling_input_data" data-text="Randomly shuffling input data" tabindex="-1">Randomly shuffling input data</h3> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle"><code translate="no" dir="ltr">Dataset.shuffle()</code></a> transformation maintains a fixed-size buffer and chooses the next element uniformly at random from that buffer.</p> <aside class="note"><strong>Note:</strong><span> While large buffer_sizes shuffle more thoroughly, they can take a lot of memory, and significant time to fill. Consider using <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#interleave"><code translate="no" dir="ltr">Dataset.interleave</code></a> across files if this becomes a problem.</span></aside> <p>Add an index to the dataset so you can see the effect:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">lines = tf.data.TextLineDataset(titanic_file) counter = tf.data.experimental.Counter() dataset = tf.data.Dataset.zip((counter, lines)) dataset = dataset.shuffle(buffer_size=100) dataset = dataset.batch(20) dataset </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> WARNING:tensorflow:From /tmpfs/tmp/ipykernel_44933/4092668703.py:2: CounterV2 (from tensorflow.python.data.experimental.ops.counter) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.data.Dataset.counter(...)` instead. <_BatchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.string, name=None))> </pre> <p>Since the <code translate="no" dir="ltr">buffer_size</code> is 100, and the batch size is 20, the first batch contains no elements with an index over 120.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">n,line_batch = next(iter(dataset)) print(n.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [ 99 18 1 29 66 88 47 30 80 46 68 44 35 40 33 95 108 105 38 113] </pre> <p>As with <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> the order relative to <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat</code></a> matters.</p> <p><a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle"><code translate="no" dir="ltr">Dataset.shuffle</code></a> doesn't signal the end of an epoch until the shuffle buffer is empty. So a shuffle placed before a repeat will show every element of one epoch before moving to the next:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset = tf.data.Dataset.zip((counter, lines)) shuffled = dataset.shuffle(buffer_size=100).batch(10).repeat(2) print("Here are the item ID's near the epoch boundary:\n") for n, line_batch in shuffled.skip(60).take(5): print(n.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Here are the item ID's near the epoch boundary: [469 414 497 584 615 612 625 627 603 621] [582 553 343 602 626 567 486 593 616 525] [557 576 478 533 591 398 484 431] [66 43 51 18 94 3 76 52 90 57] [ 0 101 71 86 56 17 33 70 110 75] </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">shuffle_repeat = [n.numpy().mean() for n, line_batch in shuffled] plt.plot(shuffle_repeat, label="shuffle().repeat()") plt.ylabel("Mean item ID") plt.legend() </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <matplotlib.legend.Legend at 0x7f373c471af0> </pre> <p><img src="/static/guide/data_files/output_H9hlE-lGoSGz_1.png" alt="png"></p> <p>But a repeat before a shuffle mixes the epoch boundaries together:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dataset = tf.data.Dataset.zip((counter, lines)) shuffled = dataset.repeat(2).shuffle(buffer_size=100).batch(10) print("Here are the item ID's near the epoch boundary:\n") for n, line_batch in shuffled.skip(55).take(15): print(n.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Here are the item ID's near the epoch boundary: [583 415 1 542 563 9 620 622 551 548] [589 592 365 571 33 557 618 31 541 27] [537 24 615 43 18 550 11 8 39 369] [601 38 485 20 627 46 22 23 322 608] [626 590 491 63 29 564 17 19 617 66] [508 580 72 45 57 54 556 62 14 511] [623 73 75 79 599 372 21 83 547 26] [486 4 0 573 74 49 0 53 95 34] [ 60 605 15 90 99 549 16 50 91 80] [106 108 112 297 561 44 52 82 86 71] [581 77 117 28 567 10 30 3 81 89] [587 32 102 7 135 51 113 110 114 451] [ 59 64 68 116 76 306 367 128 552 136] [111 569 522 5 67 616 154 131 512 37] [539 103 142 78 85 2 87 12 149 137] </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">repeat_shuffle = [n.numpy().mean() for n, line_batch in shuffled] plt.plot(shuffle_repeat, label="shuffle().repeat()") plt.plot(repeat_shuffle, label="repeat().shuffle()") plt.ylabel("Mean item ID") plt.legend() </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <matplotlib.legend.Legend at 0x7f373c462b20> </pre> <p><img src="/static/guide/data_files/output_VAM4cbpZoSGL_1.png" alt="png"></p> <h2 id="preprocessing_data" data-text="Preprocessing data" tabindex="-1">Preprocessing data</h2> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map(f)</code></a> transformation produces a new dataset by applying a given function <code translate="no" dir="ltr">f</code> to each element of the input dataset. It is based on the <a href="https://en.wikipedia.org/wiki/Map_(higher-order_function)"><code translate="no" dir="ltr">map()</code></a> function that is commonly applied to lists (and other structures) in functional programming languages. The function <code translate="no" dir="ltr">f</code> takes the <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a> objects that represent a single element in the input, and returns the <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a> objects that will represent a single element in the new dataset. Its implementation uses standard TensorFlow operations to transform one element into another.</p> <p>This section covers common examples of how to use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map()</code></a>.</p> <h3 id="decoding_image_data_and_resizing_it" data-text="Decoding image data and resizing it" tabindex="-1">Decoding image data and resizing it</h3> <!-- TODO(markdaoust): link to image augmentation when it exists --> <p>When training a neural network on real-world image data, it is often necessary to convert images of different sizes to a common size, so that they may be batched into a fixed size.</p> <p>Rebuild the flower filenames dataset:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">list_ds = tf.data.Dataset.list_files(str(flowers_root/'*/*')) </code></pre> <p>Write a function that manipulates the dataset elements.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr"># Reads an image from a file, decodes it into a dense tensor, and resizes it # to a fixed shape. def parse_image(filename): parts = tf.strings.split(filename, os.sep) label = parts[-2] image = tf.io.read_file(filename) image = tf.io.decode_jpeg(image) image = tf.image.convert_image_dtype(image, tf.float32) image = tf.image.resize(image, [128, 128]) return image, label </code></pre> <p>Test that it works.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">file_path = next(iter(list_ds)) image, label = parse_image(file_path) def show(image, label): plt.figure() plt.imshow(image) plt.title(label.numpy().decode('utf-8')) plt.axis('off') show(image, label) </code></pre> <p><img src="/static/guide/data_files/output_y8xuN_HBzGup_0.png" alt="png"></p> <p>Map it over the dataset.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">images_ds = list_ds.map(parse_image) for image, label in images_ds.take(2): show(image, label) </code></pre> <p><img src="/static/guide/data_files/output_SzO8LI_H5Sk__0.png" alt="png"></p> <p><img src="/static/guide/data_files/output_SzO8LI_H5Sk__1.png" alt="png"></p> <h3 id="applying_arbitrary_python_logic" data-text="Applying arbitrary Python logic" tabindex="-1">Applying arbitrary Python logic</h3> <p>For performance reasons, use TensorFlow operations for preprocessing your data whenever possible. However, it is sometimes useful to call external Python libraries when parsing your input data. You can use the <a href="https://www.tensorflow.org/api_docs/python/tf/py_function"><code translate="no" dir="ltr">tf.py_function</code></a> operation in a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map</code></a> transformation.</p> <p>For example, if you want to apply a random rotation, the <a href="https://www.tensorflow.org/api_docs/python/tf/image"><code translate="no" dir="ltr">tf.image</code></a> module only has <a href="https://www.tensorflow.org/api_docs/python/tf/image/rot90"><code translate="no" dir="ltr">tf.image.rot90</code></a>, which is not very useful for image augmentation.</p> <aside class="note"><strong>Note:</strong><span> <code translate="no" dir="ltr">tensorflow_addons</code> has a TensorFlow compatible <code translate="no" dir="ltr">rotate</code> in <code translate="no" dir="ltr">tensorflow_addons.image.rotate</code>.</span></aside> <p>To demonstrate <a href="https://www.tensorflow.org/api_docs/python/tf/py_function"><code translate="no" dir="ltr">tf.py_function</code></a>, try using the <code translate="no" dir="ltr">scipy.ndimage.rotate</code> function instead:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">import scipy.ndimage as ndimage @tf.py_function(Tout=tf.float32) def random_rotate_image(image): image = ndimage.rotate(image, np.random.uniform(-30, 30), reshape=False) return image </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">image, label = next(iter(images_ds)) image = random_rotate_image(image) show(image, label) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-0.07214577..1.0803627]. </pre> <p><img src="/static/guide/data_files/output__wEyL7bS9S6t_1.png" alt="png"></p> <p>To use this function with <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map</code></a> the same caveats apply as with <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator"><code translate="no" dir="ltr">Dataset.from_generator</code></a>, you need to describe the return shapes and types when you apply the function:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def tf_random_rotate_image(image, label): im_shape = image.shape image = random_rotate_image(image) image.set_shape(im_shape) return image, label </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">rot_ds = images_ds.map(tf_random_rotate_image) for image, label in rot_ds.take(2): show(image, label) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-0.014158356..1.0156134]. Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-0.067302234..1.1018459]. </pre> <p><img src="/static/guide/data_files/output_bWPqKbTnbDct_1.png" alt="png"></p> <p><img src="/static/guide/data_files/output_bWPqKbTnbDct_2.png" alt="png"></p> <h3 id="parsing_tfexample_protocol_buffer_messages" data-text="Parsing tf.Example protocol buffer messages" tabindex="-1">Parsing <code translate="no" dir="ltr">tf.Example</code> protocol buffer messages</h3> <p>Many input pipelines extract <a href="https://www.tensorflow.org/api_docs/python/tf/train/Example"><code translate="no" dir="ltr">tf.train.Example</code></a> protocol buffer messages from a TFRecord format. Each <a href="https://www.tensorflow.org/api_docs/python/tf/train/Example"><code translate="no" dir="ltr">tf.train.Example</code></a> record contains one or more "features", and the input pipeline typically converts these features into tensors.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">fsns_test_file = tf.keras.utils.get_file("fsns.tfrec", "https://storage.googleapis.com/download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001") dataset = tf.data.TFRecordDataset(filenames = [fsns_test_file]) dataset </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)> </pre> <p>You can work with <a href="https://www.tensorflow.org/api_docs/python/tf/train/Example"><code translate="no" dir="ltr">tf.train.Example</code></a> protos outside of a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> to understand the data:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">raw_example = next(iter(dataset)) parsed = tf.train.Example.FromString(raw_example.numpy()) feature = parsed.features.feature raw_img = feature['image/encoded'].bytes_list.value[0] img = tf.image.decode_png(raw_img) plt.imshow(img) plt.axis('off') _ = plt.title(feature["image/text"].bytes_list.value[0]) </code></pre> <p><img src="/static/guide/data_files/output_4znsVNqnF73C_0.png" alt="png"></p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">raw_example = next(iter(dataset)) </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def tf_parse(eg): example = tf.io.parse_example( eg[tf.newaxis], { 'image/encoded': tf.io.FixedLenFeature(shape=(), dtype=tf.string), 'image/text': tf.io.FixedLenFeature(shape=(), dtype=tf.string) }) return example['image/encoded'][0], example['image/text'][0] </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">img, txt = tf_parse(raw_example) print(txt.numpy()) print(repr(img.numpy()[:20]), "...") </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> b'Rue Perreyon' b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X' ... </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">decoded = dataset.map(tf_parse) decoded </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_MapDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.string, name=None))> </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">image_batch, text_batch = next(iter(decoded.batch(10))) image_batch.shape </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> TensorShape([10]) </pre> <p><a id="time_series_windowing"></a></p> <h3 id="time_series_windowing" data-text="Time series windowing" tabindex="-1">Time series windowing</h3> <p>For an end-to-end time series example see: <a href="https://www.tensorflow.org/tutorials/structured_data/time_series">Time series forecasting</a>.</p> <p>Time series data is often organized with the time axis intact.</p> <p>Use a simple <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#range"><code translate="no" dir="ltr">Dataset.range</code></a> to demonstrate:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">range_ds = tf.data.Dataset.range(100000) </code></pre> <p>Typically, models based on this sort of data will want a contiguous time slice.</p> <p>The simplest approach would be to batch the data:</p> <h4 id="using_batch" data-text="Using batch" tabindex="-1">Using <code translate="no" dir="ltr">batch</code></h4> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">batches = range_ds.batch(10, drop_remainder=True) for batch in batches.take(5): print(batch.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14 15 16 17 18 19] [20 21 22 23 24 25 26 27 28 29] [30 31 32 33 34 35 36 37 38 39] [40 41 42 43 44 45 46 47 48 49] </pre> <p>Or to make dense predictions one step into the future, you might shift the features and labels by one step relative to each other:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def dense_1_step(batch): # Shift features and labels one step relative to each other. return batch[:-1], batch[1:] predict_dense_1_step = batches.map(dense_1_step) for features, label in predict_dense_1_step.take(3): print(features.numpy(), " => ", label.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [0 1 2 3 4 5 6 7 8] => [1 2 3 4 5 6 7 8 9] [10 11 12 13 14 15 16 17 18] => [11 12 13 14 15 16 17 18 19] [20 21 22 23 24 25 26 27 28] => [21 22 23 24 25 26 27 28 29] </pre> <p>To predict a whole window instead of a fixed offset you can split the batches into two parts:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">batches = range_ds.batch(15, drop_remainder=True) def label_next_5_steps(batch): return (batch[:-5], # Inputs: All except the last 5 steps batch[-5:]) # Labels: The last 5 steps predict_5_steps = batches.map(label_next_5_steps) for features, label in predict_5_steps.take(3): print(features.numpy(), " => ", label.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [0 1 2 3 4 5 6 7 8 9] => [10 11 12 13 14] [15 16 17 18 19 20 21 22 23 24] => [25 26 27 28 29] [30 31 32 33 34 35 36 37 38 39] => [40 41 42 43 44] </pre> <p>To allow some overlap between the features of one batch and the labels of another, use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#zip"><code translate="no" dir="ltr">Dataset.zip</code></a>:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">feature_length = 10 label_length = 3 features = range_ds.batch(feature_length, drop_remainder=True) labels = range_ds.batch(feature_length).skip(1).map(lambda labels: labels[:label_length]) predicted_steps = tf.data.Dataset.zip((features, labels)) for features, label in predicted_steps.take(5): print(features.numpy(), " => ", label.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [0 1 2 3 4 5 6 7 8 9] => [10 11 12] [10 11 12 13 14 15 16 17 18 19] => [20 21 22] [20 21 22 23 24 25 26 27 28 29] => [30 31 32] [30 31 32 33 34 35 36 37 38 39] => [40 41 42] [40 41 42 43 44 45 46 47 48 49] => [50 51 52] </pre> <h4 id="using_window" data-text="Using window" tabindex="-1">Using <code translate="no" dir="ltr">window</code></h4> <p>While using <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> works, there are situations where you may need finer control. The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#window"><code translate="no" dir="ltr">Dataset.window</code></a> method gives you complete control, but requires some care: it returns a <code translate="no" dir="ltr">Dataset</code> of <code translate="no" dir="ltr">Datasets</code>. Go to the <a href="#dataset_structure">Dataset structure</a> section for details.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">window_size = 5 windows = range_ds.window(window_size, shift=1) for sub_ds in windows.take(5): print(sub_ds) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> </pre> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#flat_map"><code translate="no" dir="ltr">Dataset.flat_map</code></a> method can take a dataset of datasets and flatten it into a single dataset:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for x in windows.flat_map(lambda x: x).take(30): print(x.numpy(), end=' ') </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 0 1 2 3 4 1 2 3 4 5 2 3 4 5 6 3 4 5 6 7 4 5 6 7 8 5 6 7 8 9 </pre> <p>In nearly all cases, you will want to <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> the dataset first:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def sub_to_batch(sub): return sub.batch(window_size, drop_remainder=True) for example in windows.flat_map(sub_to_batch).take(5): print(example.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [0 1 2 3 4] [1 2 3 4 5] [2 3 4 5 6] [3 4 5 6 7] [4 5 6 7 8] </pre> <p>Now, you can see that the <code translate="no" dir="ltr">shift</code> argument controls how much each window moves over.</p> <p>Putting this together you might write this function:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def make_window_dataset(ds, window_size=5, shift=1, stride=1): windows = ds.window(window_size, shift=shift, stride=stride) def sub_to_batch(sub): return sub.batch(window_size, drop_remainder=True) windows = windows.flat_map(sub_to_batch) return windows </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">ds = make_window_dataset(range_ds, window_size=10, shift = 5, stride=3) for example in ds.take(10): print(example.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [ 0 3 6 9 12 15 18 21 24 27] [ 5 8 11 14 17 20 23 26 29 32] [10 13 16 19 22 25 28 31 34 37] [15 18 21 24 27 30 33 36 39 42] [20 23 26 29 32 35 38 41 44 47] [25 28 31 34 37 40 43 46 49 52] [30 33 36 39 42 45 48 51 54 57] [35 38 41 44 47 50 53 56 59 62] [40 43 46 49 52 55 58 61 64 67] [45 48 51 54 57 60 63 66 69 72] </pre> <p>Then it's easy to extract labels, as before:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">dense_labels_ds = ds.map(dense_1_step) for inputs,labels in dense_labels_ds.take(3): print(inputs.numpy(), "=>", labels.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [ 0 3 6 9 12 15 18 21 24] => [ 3 6 9 12 15 18 21 24 27] [ 5 8 11 14 17 20 23 26 29] => [ 8 11 14 17 20 23 26 29 32] [10 13 16 19 22 25 28 31 34] => [13 16 19 22 25 28 31 34 37] </pre> <h3 id="resampling" data-text="Resampling" tabindex="-1">Resampling</h3> <p>When working with a dataset that is very class-imbalanced, you may want to resample the dataset. <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> provides two methods to do this. The credit card fraud dataset is a good example of this sort of problem.</p> <aside class="note"><strong>Note:</strong><span> Go to <a href="https://www.tensorflow.org/tutorials/structured_data/imbalanced_data">Classification on imbalanced data</a> for a full tutorial.</span></aside><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">zip_path = tf.keras.utils.get_file( origin='https://storage.googleapis.com/download.tensorflow.org/data/creditcard.zip', fname='creditcard.zip', extract=True) csv_path = zip_path.replace('.zip', '.csv') </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/creditcard.zip 69155632/69155632 ━━━━━━━━━━━━━━━━━━━━ 1s 0us/step </pre> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">creditcard_ds = tf.data.experimental.make_csv_dataset( csv_path, batch_size=1024, label_name="Class", # Set the column types: 30 floats and an int. column_defaults=[float()]*30+[int()]) </code></pre> <p>Now, check the distribution of classes, it is highly skewed:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def count(counts, batch): features, labels = batch class_1 = labels == 1 class_1 = tf.cast(class_1, tf.int32) class_0 = labels == 0 class_0 = tf.cast(class_0, tf.int32) counts['class_0'] += tf.reduce_sum(class_0) counts['class_1'] += tf.reduce_sum(class_1) return counts </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">counts = creditcard_ds.take(10).reduce( initial_state={'class_0': 0, 'class_1': 0}, reduce_func = count) counts = np.array([counts['class_0'].numpy(), counts['class_1'].numpy()]).astype(np.float32) fractions = counts/counts.sum() print(fractions) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [0.996 0.004] </pre> <p>A common approach to training with an imbalanced dataset is to balance it. <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> includes a few methods which enable this workflow:</p> <h4 id="datasets_sampling" data-text="Datasets sampling" tabindex="-1">Datasets sampling</h4> <p>One approach to resampling a dataset is to use <code translate="no" dir="ltr">sample_from_datasets</code>. This is more applicable when you have a separate <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> for each class.</p> <p>Here, just use filter to generate them from the credit card fraud data:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">negative_ds = ( creditcard_ds .unbatch() .filter(lambda features, label: label==0) .repeat()) positive_ds = ( creditcard_ds .unbatch() .filter(lambda features, label: label==1) .repeat()) </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for features, label in positive_ds.batch(10).take(1): print(label.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [1 1 1 1 1 1 1 1 1 1] </pre> <p>To use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#sample_from_datasets"><code translate="no" dir="ltr">tf.data.Dataset.sample_from_datasets</code></a> pass the datasets, and the weight for each:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">balanced_ds = tf.data.Dataset.sample_from_datasets( [negative_ds, positive_ds], [0.5, 0.5]).batch(10) </code></pre> <p>Now the dataset produces examples of each class with a 50/50 probability:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for features, labels in balanced_ds.take(10): print(labels.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [1 0 1 0 0 1 0 1 1 0] [1 0 0 0 0 0 0 0 1 1] [0 0 1 0 0 1 0 0 1 0] [0 1 1 0 1 0 0 1 1 0] [0 1 1 0 0 0 1 1 1 1] [1 1 1 1 1 1 0 0 0 0] [0 1 1 0 1 0 0 1 1 1] [1 1 0 0 0 0 0 1 0 1] [1 1 0 1 1 1 1 0 0 1] [0 1 1 0 0 1 0 0 0 0] </pre> <h4 id="rejection_resampling" data-text="Rejection resampling" tabindex="-1">Rejection resampling</h4> <p>One problem with the above <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#sample_from_datasets"><code translate="no" dir="ltr">Dataset.sample_from_datasets</code></a> approach is that it needs a separate <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> per class. You could use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#filter"><code translate="no" dir="ltr">Dataset.filter</code></a> to create those two datasets, but that results in all the data being loaded twice.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#rejection_resample"><code translate="no" dir="ltr">tf.data.Dataset.rejection_resample</code></a> method can be applied to a dataset to rebalance it, while only loading it once. Elements will be dropped or repeated to achieve balance.</p> <p>The <code translate="no" dir="ltr">rejection_resample</code> method takes a <code translate="no" dir="ltr">class_func</code> argument. This <code translate="no" dir="ltr">class_func</code> is applied to each dataset element, and is used to determine which class an example belongs to for the purposes of balancing.</p> <p>The goal here is to balance the label distribution, and the elements of <code translate="no" dir="ltr">creditcard_ds</code> are already <code translate="no" dir="ltr">(features, label)</code> pairs. So the <code translate="no" dir="ltr">class_func</code> just needs to return those labels:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">def class_func(features, label): return label </code></pre> <p>The resampling method deals with individual examples, so in this case you must <code translate="no" dir="ltr">unbatch</code> the dataset before applying that method.</p> <p>The method needs a target distribution, and optionally an initial distribution estimate as inputs.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">resample_ds = ( creditcard_ds .unbatch() .rejection_resample(class_func, target_dist=[0.5,0.5], initial_dist=fractions) .batch(10)) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py:4968: Print (from tensorflow.python.ops.logging_ops) is deprecated and will be removed after 2018-08-20. Instructions for updating: Use tf.print instead of tf.Print. Note that tf.print returns a no-output operator that directly prints the output. Outside of defuns or eager mode, this operator will not be executed unless it is directly specified in session.run or used as a control dependency for other operators. This is only a concern in graph mode. Below is an example of how to ensure tf.print executes in graph mode: </pre> <p>The <code translate="no" dir="ltr">rejection_resample</code> method returns <code translate="no" dir="ltr">(class, example)</code> pairs where the <code translate="no" dir="ltr">class</code> is the output of the <code translate="no" dir="ltr">class_func</code>. In this case, the <code translate="no" dir="ltr">example</code> was already a <code translate="no" dir="ltr">(feature, label)</code> pair, so use <code translate="no" dir="ltr">map</code> to drop the extra copy of the labels:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">balanced_ds = resample_ds.map(lambda extra_label, features_and_label: features_and_label) </code></pre> <p>Now the dataset produces examples of each class with a 50/50 probability:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">for features, labels in balanced_ds.take(10): print(labels.numpy()) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] [1 0 1 0 1 0 1 0 1 1] [1 0 1 1 1 1 0 0 1 0] [1 0 1 1 0 1 0 0 0 1] [0 1 0 0 0 0 1 1 1 1] [1 0 0 0 1 1 1 0 1 0] [0 0 0 1 0 0 1 0 1 1] [0 1 0 0 0 0 1 0 1 0] [1 0 0 0 0 1 0 0 0 1] [0 0 0 0 1 1 1 1 1 0] [1 1 0 1 1 1 1 1 1 0] </pre> <h2 id="iterator_checkpointing" data-text="Iterator Checkpointing" tabindex="-1">Iterator Checkpointing</h2> <p>Tensorflow supports <a href="https://www.tensorflow.org/guide/checkpoint">taking checkpoints</a> so that when your training process restarts it can restore the latest checkpoint to recover most of its progress. In addition to checkpointing the model variables, you can also checkpoint the progress of the dataset iterator. This could be useful if you have a large dataset and don't want to start the dataset from the beginning on each restart. Note however that iterator checkpoints may be large, since transformations such as <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle"><code translate="no" dir="ltr">Dataset.shuffle</code></a> and <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#prefetch"><code translate="no" dir="ltr">Dataset.prefetch</code></a> require buffering elements within the iterator.</p> <p>To include your iterator in a checkpoint, pass the iterator to the <a href="https://www.tensorflow.org/api_docs/python/tf/train/Checkpoint"><code translate="no" dir="ltr">tf.train.Checkpoint</code></a> constructor.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">range_ds = tf.data.Dataset.range(20) iterator = iter(range_ds) ckpt = tf.train.Checkpoint(step=tf.Variable(0), iterator=iterator) manager = tf.train.CheckpointManager(ckpt, '/tmp/my_ckpt', max_to_keep=3) print([next(iterator).numpy() for _ in range(5)]) save_path = manager.save() print([next(iterator).numpy() for _ in range(5)]) ckpt.restore(manager.latest_checkpoint) print([next(iterator).numpy() for _ in range(5)]) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> [0, 1, 2, 3, 4] [5, 6, 7, 8, 9] [5, 6, 7, 8, 9] </pre> <aside class="note"><strong>Note:</strong><span> It is not possible to checkpoint an iterator which relies on an external state, such as a <a href="https://www.tensorflow.org/api_docs/python/tf/py_function"><code translate="no" dir="ltr">tf.py_function</code></a>. Attempting to do so will raise an exception complaining about the external state.</span></aside> <h2 id="using_tfdata_with_tfkeras" data-text="Using tf.data with tf.keras" tabindex="-1">Using <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> with <a href="https://www.tensorflow.org/api_docs/python/tf/keras"><code translate="no" dir="ltr">tf.keras</code></a></h2> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/keras"><code translate="no" dir="ltr">tf.keras</code></a> API simplifies many aspects of creating and executing machine learning models. Its <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit"><code translate="no" dir="ltr">Model.fit</code></a> and <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#evaluate"><code translate="no" dir="ltr">Model.evaluate</code></a> and <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#predict"><code translate="no" dir="ltr">Model.predict</code></a> APIs support datasets as inputs. Here is a quick dataset and model setup:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">train, test = tf.keras.datasets.fashion_mnist.load_data() images, labels = train images = images/255.0 labels = labels.astype(np.int32) </code></pre><pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">fmnist_train_ds = tf.data.Dataset.from_tensor_slices((images, labels)) fmnist_train_ds = fmnist_train_ds.shuffle(5000).batch(32) model = tf.keras.Sequential([ tf.keras.layers.Flatten(), tf.keras.layers.Dense(10) ]) model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) </code></pre> <p>Passing a dataset of <code translate="no" dir="ltr">(feature, label)</code> pairs is all that's needed for <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit"><code translate="no" dir="ltr">Model.fit</code></a> and <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#evaluate"><code translate="no" dir="ltr">Model.evaluate</code></a>:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">model.fit(fmnist_train_ds, epochs=2) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Epoch 1/2 WARNING: All log messages before absl::InitializeLog() is called are written to STDERR I0000 00:00:1723685884.693688 45100 service.cc:146] XLA service 0x7f35cc006690 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: I0000 00:00:1723685884.693721 45100 service.cc:154] StreamExecutor device (0): Tesla T4, Compute Capability 7.5 I0000 00:00:1723685884.693725 45100 service.cc:154] StreamExecutor device (1): Tesla T4, Compute Capability 7.5 I0000 00:00:1723685884.693728 45100 service.cc:154] StreamExecutor device (2): Tesla T4, Compute Capability 7.5 I0000 00:00:1723685884.693731 45100 service.cc:154] StreamExecutor device (3): Tesla T4, Compute Capability 7.5 136/1875 ━━━━━━━━━━━━━━━━━━━━ 1s 1ms/step - accuracy: 0.5241 - loss: 1.4346 I0000 00:00:1723685885.241810 45100 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process. 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - accuracy: 0.7449 - loss: 0.7643 Epoch 2/2 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.8381 - loss: 0.4704 <keras.src.callbacks.history.History at 0x7f373f583250> </pre> <p>If you pass an infinite dataset, for example by calling <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat</code></a>, you just need to also pass the <code translate="no" dir="ltr">steps_per_epoch</code> argument:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">model.fit(fmnist_train_ds.repeat(), epochs=2, steps_per_epoch=20) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> Epoch 1/2 20/20 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.8254 - loss: 0.4682 Epoch 2/2 20/20 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.8622 - loss: 0.4263 <keras.src.callbacks.history.History at 0x7f37443e1190> </pre> <p>For evaluation you can pass the number of evaluation steps:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">loss, accuracy = model.evaluate(fmnist_train_ds) print("Loss :", loss) print("Accuracy :", accuracy) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.8504 - loss: 0.4343 Loss : 0.4353208839893341 Accuracy : 0.849216639995575 </pre> <p>For long datasets, set the number of steps to evaluate:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">loss, accuracy = model.evaluate(fmnist_train_ds.repeat(), steps=10) print("Loss :", loss) print("Accuracy :", accuracy) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 10/10 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8411 - loss: 0.5209 Loss : 0.46679750084877014 Accuracy : 0.84375 </pre> <p>The labels are not required when calling <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#predict"><code translate="no" dir="ltr">Model.predict</code></a>.</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">predict_ds = tf.data.Dataset.from_tensor_slices(images).batch(32) result = model.predict(predict_ds, steps = 10) print(result.shape) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 10/10 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step (320, 10) </pre> <p>But the labels are ignored if you do pass a dataset containing them:</p> <pre class="prettyprint lang-python" translate="no" dir="ltr"><code translate="no" dir="ltr">result = model.predict(fmnist_train_ds, steps = 10) print(result.shape) </code></pre> <pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr"> 10/10 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step (320, 10) </pre> </div> <devsite-thumb-rating position="footer"> </devsite-thumb-rating> <div class="devsite-floating-action-buttons"> </div> </article> <devsite-content-footer class="nocontent"> <p>Except as otherwise noted, the content of this page is licensed under the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 License</a>, and code samples are licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache 2.0 License</a>. For details, see the <a href="https://developers.google.com/site-policies">Google Developers Site Policies</a>. Java is a registered trademark of Oracle and/or its affiliates.</p> <p>Last updated 2024-08-15 UTC.</p> </devsite-content-footer> <devsite-notification > </devsite-notification> <div class="devsite-content-data"> <template class="devsite-content-data-template"> [[["Easy to understand","easyToUnderstand","thumb-up"],["Solved my problem","solvedMyProblem","thumb-up"],["Other","otherUp","thumb-up"]],[["Missing the information I need","missingTheInformationINeed","thumb-down"],["Too complicated / too many steps","tooComplicatedTooManySteps","thumb-down"],["Out of date","outOfDate","thumb-down"],["Samples / code issue","samplesCodeIssue","thumb-down"],["Other","otherDown","thumb-down"]],["Last updated 2024-08-15 UTC."],[],[]] </template> </div> </devsite-content> </main> <devsite-footer-promos class="devsite-footer"> </devsite-footer-promos> <devsite-footer-linkboxes class="devsite-footer"> <nav class="devsite-footer-linkboxes nocontent" aria-label="Footer links"> <ul class="devsite-footer-linkboxes-list"> <li class="devsite-footer-linkbox "> <h3 class="devsite-footer-linkbox-heading no-link">Stay connected</h3> <ul class="devsite-footer-linkbox-list"> <li class="devsite-footer-linkbox-item"> <a href="//blog.tensorflow.org" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 1)" > Blog </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//discuss.tensorflow.org" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 2)" > Forum </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//github.com/tensorflow/" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 3)" > GitHub </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//twitter.com/tensorflow" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 4)" > Twitter </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//youtube.com/tensorflow" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 5)" > YouTube </a> </li> </ul> </li> <li class="devsite-footer-linkbox "> <h3 class="devsite-footer-linkbox-heading no-link">Support</h3> <ul class="devsite-footer-linkbox-list"> <li class="devsite-footer-linkbox-item"> <a href="//github.com/tensorflow/tensorflow/issues" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 1)" > Issue tracker </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//github.com/tensorflow/tensorflow/blob/master/RELEASE.md" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 2)" > Release notes </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//stackoverflow.com/questions/tagged/tensorflow" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 3)" > Stack Overflow </a> </li> <li class="devsite-footer-linkbox-item"> <a href="/extras/tensorflow_brand_guidelines.pdf" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 4)" > Brand guidelines </a> </li> <li class="devsite-footer-linkbox-item"> <a href="/about/bib" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 5)" > Cite TensorFlow </a> </li> </ul> </li> </ul> </nav> </devsite-footer-linkboxes> <devsite-footer-utility class="devsite-footer"> <div class="devsite-footer-utility nocontent"> <nav class="devsite-footer-utility-links" aria-label="Utility links"> <ul class="devsite-footer-utility-list"> <li class="devsite-footer-utility-item "> <a class="devsite-footer-utility-link gc-analytics-event" href="//policies.google.com/terms" data-category="Site-Wide Custom Events" data-label="Footer Terms link" > Terms </a> </li> <li class="devsite-footer-utility-item "> <a class="devsite-footer-utility-link gc-analytics-event" href="//policies.google.com/privacy" data-category="Site-Wide Custom Events" data-label="Footer Privacy link" > Privacy </a> </li> <li class="devsite-footer-utility-item glue-cookie-notification-bar-control"> <a class="devsite-footer-utility-link gc-analytics-event" href="#" data-category="Site-Wide Custom Events" data-label="Footer Manage cookies link" aria-hidden="true" > Manage cookies </a> </li> <li class="devsite-footer-utility-item devsite-footer-utility-button"> <span class="devsite-footer-utility-description">Sign up for the TensorFlow newsletter</span> <a class="devsite-footer-utility-link gc-analytics-event" href="//www.tensorflow.org/subscribe" data-category="Site-Wide Custom Events" data-label="Footer Subscribe link" > Subscribe </a> </li> </ul> <devsite-language-selector> <ul role="presentation"> <li role="presentation"> <a role="menuitem" lang="en" >English</a> </li> <li role="presentation"> <a role="menuitem" lang="es_419" >Español – América Latina</a> </li> <li role="presentation"> <a role="menuitem" lang="fr" >Français</a> </li> <li role="presentation"> <a role="menuitem" lang="id" >Indonesia</a> </li> <li role="presentation"> <a role="menuitem" lang="it" >Italiano</a> </li> <li role="presentation"> <a role="menuitem" lang="pl" >Polski</a> </li> <li role="presentation"> <a role="menuitem" lang="pt_br" >Português – Brasil</a> </li> <li role="presentation"> <a role="menuitem" lang="vi" >Tiếng Việt</a> </li> <li role="presentation"> <a role="menuitem" lang="tr" >Türkçe</a> </li> <li role="presentation"> <a role="menuitem" lang="ru" >Русский</a> </li> <li role="presentation"> <a role="menuitem" lang="he" >עברית</a> </li> <li role="presentation"> <a role="menuitem" lang="ar" >العربيّة</a> </li> <li role="presentation"> <a role="menuitem" lang="fa" >فارسی</a> </li> <li role="presentation"> <a role="menuitem" lang="hi" >हिंदी</a> </li> <li role="presentation"> <a role="menuitem" lang="bn" >বাংলা</a> </li> <li role="presentation"> <a role="menuitem" lang="th" >ภาษาไทย</a> </li> <li role="presentation"> <a role="menuitem" lang="zh_cn" >中文 – 简体</a> </li> <li role="presentation"> <a role="menuitem" lang="ja" >日本語</a> </li> <li role="presentation"> <a role="menuitem" lang="ko" >한국어</a> </li> </ul> </devsite-language-selector> </nav> </div> </devsite-footer-utility> <devsite-panel></devsite-panel> </section></section> <devsite-sitemask></devsite-sitemask> <devsite-snackbar></devsite-snackbar> <devsite-tooltip ></devsite-tooltip> <devsite-heading-link></devsite-heading-link> <devsite-analytics> <script type="application/json" analytics>[{"dimensions": {"dimension12": false, "dimension5": "en", "dimension1": "Signed out", "dimension6": "en", "dimension4": "TensorFlow Core", "dimension3": false}, "gaid": "UA-69864048-1", "metrics": {"ratings_value": "metric1", "ratings_count": "metric2"}, "purpose": 0}]</script> <script type="application/json" tag-management>{"at": "True", "ga4": [], "ga4p": [], "gtm": [{"id": "GTM-MXSL34P", "purpose": 0}], "parameters": {"internalUser": "False", "language": {"machineTranslated": "False", "requested": "en", "served": "en"}, "pageType": "article", "projectName": "TensorFlow Core", "signedIn": "False", "tenant": "tensorflow", "recommendations": {"sourcePage": "", "sourceType": 0, "sourceRank": 0, "sourceIdenticalDescriptions": 0, "sourceTitleWords": 0, "sourceDescriptionWords": 0, "experiment": ""}, "experiment": {"ids": ""}}}</script> </devsite-analytics> <devsite-badger></devsite-badger> <script nonce="EhEQzViVhcJJTIoLfoT1dcRunhgUQE"> (function(d,e,v,s,i,t,E){d['GoogleDevelopersObject']=i; t=e.createElement(v);t.async=1;t.src=s;E=e.getElementsByTagName(v)[0]; E.parentNode.insertBefore(t,E);})(window, document, 'script', 'https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/tensorflow/js/app_loader.js', '[15,"en",null,"/js/devsite_app_module.js","https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625","https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/tensorflow","https://tensorflow-dot-devsite-v2-prod-3p.appspot.com",null,null,["/_pwa/tensorflow/manifest.json","https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/images/video-placeholder.svg","https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/tensorflow/images/favicon.png","https://www.gstatic.com/devrel-devsite/prod/v870e399c64f7c43c99a3043db4b3a74327bb93d0914e84a0c3dba90bbfd67625/tensorflow/images/lockup.svg","https://fonts.googleapis.com/css?family=Google+Sans:400,500|Roboto:400,400italic,500,500italic,700,700italic|Roboto+Mono:400,500,700&display=swap"],1,null,[1,6,8,12,14,17,21,25,50,52,63,70,75,76,80,87,91,92,93,97,98,100,101,102,103,104,105,107,108,109,110,112,113,116,117,118,120,122,124,125,126,127,129,130,131,132,133,134,135,136,138,140,141,147,148,149,151,152,156,157,158,159,161,163,164,168,169,170,179,180,182,183,186,191,193,196],"AIzaSyCNm9YxQumEXwGJgTDjxoxXK6m1F-9720Q","AIzaSyCc76DZePGtoyUjqKrLdsMGk_ry7sljLbY","www.tensorflow.org","AIzaSyB9bqgQ2t11WJsOX8qNsCQ6U-w91mmqF-I","AIzaSyAdYnStPdzjcJJtQ0mvIaeaMKj7_t6J_Fg",null,null,null,["Concierge__enable_pushui","MiscFeatureFlags__developers_footer_image","Profiles__require_profile_eligibility_for_signin","Cloud__enable_cloud_shell","Cloud__enable_free_trial_server_call","EngEduTelemetry__enable_engedu_telemetry","Profiles__enable_dashboard_curated_recommendations","BookNav__enable_tenant_cache_key","Search__enable_suggestions_from_borg","DevPro__enable_cloud_innovators_plus","Cloud__enable_cloud_facet_chat","Cloud__enable_legacy_calculator_redirect","MiscFeatureFlags__enable_firebase_utm","MiscFeatureFlags__enable_explain_this_code","Profiles__enable_complete_playlist_endpoint","Search__enable_ai_eligibility_checks","CloudShell__cloud_shell_button","MiscFeatureFlags__developers_footer_dark_image","Cloud__enable_cloud_shell_fte_user_flow","Search__enable_dynamic_content_confidential_banner","Profiles__enable_page_saving","Analytics__enable_clearcut_logging","Profiles__enable_awarding_url","Profiles__enable_release_notes_notifications","Cloud__enable_cloudx_experiment_ids","Profiles__enable_profile_collections","Profiles__enable_public_developer_profiles","DevPro__enable_developer_subscriptions","MiscFeatureFlags__enable_project_variables","MiscFeatureFlags__enable_view_transitions","Cloud__enable_cloudx_ping","Profiles__enable_recognition_badges","TpcFeatures__enable_mirror_tenant_redirects","Cloud__enable_llm_concierge_chat","Profiles__enable_developer_profiles_callout","MiscFeatureFlags__enable_variable_operator","Search__enable_page_map","TpcFeatures__enable_required_headers","MiscFeatureFlags__emergency_css","Cloud__enable_cloud_dlp_service","CloudShell__cloud_code_overflow_menu","SignIn__enable_refresh_access_tokens","Profiles__enable_completecodelab_endpoint","Experiments__reqs_query_experiments"],null,null,"AIzaSyA58TaKli1DculwmAmbpzLVGuWc8eCQgQc","https://developerscontentserving-pa.googleapis.com","AIzaSyDWBU60w0P9hEkr29kkksYs8Z7gvZ8u_wc","https://developerscontentsearch-pa.googleapis.com",2,4,null,"https://developerprofiles-pa.googleapis.com",[15,"tensorflow","TensorFlow","www.tensorflow.org",null,"tensorflow-dot-devsite-v2-prod-3p.appspot.com",null,null,[null,1,null,null,null,null,null,null,null,null,null,[1],null,null,null,null,null,null,[1],[1,null,null,[1]],null,null,null,[1,null,1],[1,1,null,1,1]],null,[25,null,null,null,null,null,"/images/lockup.svg","/images/logo.png",null,null,null,1,1,null,null,null,null,null,null,null,null,1,null,null,null,null,[]],[],null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,[6,1],null,[[],[1,1]],[[["UA-69864048-1"],["UA-69864048-4"],null,null,["UA-69864048-5"],["GTM-MXSL34P"],null,null,[["UA-69864048-1",1]],null,[["UA-69864048-5",1]],[["GTM-MXSL34P",1]],1],[[3,2],[5,4],[4,3],[12,8],[6,5],[1,1]],[[1,1],[2,2]]],null,4]]') </script> <devsite-a11y-announce></devsite-a11y-announce> </body> </html>