CINXE.COM
tf.data: Build TensorFlow input pipelines | TensorFlow Core
<!doctype html> <html lang="en" dir="ltr"> <head> <meta name="google-signin-client-id" content="157101835696-ooapojlodmuabs2do2vuhhnf90bccmoi.apps.googleusercontent.com"> <meta name="google-signin-scope" content="profile email https://www.googleapis.com/auth/developerprofiles https://www.googleapis.com/auth/developerprofiles.award"> <meta property="og:site_name" content="TensorFlow"> <meta property="og:type" content="website"><meta name="theme-color" content="#ff6f00"><meta charset="utf-8"> <meta content="IE=Edge" http-equiv="X-UA-Compatible"> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="manifest" href="/_pwa/tensorflow/manifest.json" crossorigin="use-credentials"> <link rel="preconnect" href="//www.gstatic.com" crossorigin> <link rel="preconnect" href="//fonts.gstatic.com" crossorigin> <link rel="preconnect" href="//fonts.googleapis.com" crossorigin> <link rel="preconnect" href="//apis.google.com" crossorigin> <link rel="preconnect" href="//www.google-analytics.com" crossorigin><link rel="stylesheet" href="//fonts.googleapis.com/css?family=Google+Sans:400,500|Roboto:400,400italic,500,500italic,700,700italic|Roboto+Mono:400,500,700&display=swap"> <link rel="stylesheet" href="//fonts.googleapis.com/css2?family=Material+Icons&family=Material+Symbols+Outlined&display=block"><link rel="stylesheet" href="https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/tensorflow/css/app.css"> <link rel="shortcut icon" href="https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/tensorflow/images/favicon.png"> <link rel="apple-touch-icon" href="https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/tensorflow/images/apple-touch-icon-180x180.png"><link rel="canonical" href="https://www.tensorflow.org/guide/data"><link rel="search" type="application/opensearchdescription+xml" title="TensorFlow" href="https://www.tensorflow.org/s/opensearch.xml"> <link rel="alternate" hreflang="en" href="https://www.tensorflow.org/guide/data" /><link rel="alternate" hreflang="x-default" href="https://www.tensorflow.org/guide/data" /><link rel="alternate" hreflang="ar" href="https://www.tensorflow.org/guide/data?hl=ar" /><link rel="alternate" hreflang="bn" href="https://www.tensorflow.org/guide/data?hl=bn" /><link rel="alternate" hreflang="zh-Hans" href="https://www.tensorflow.org/guide/data?hl=zh-cn" /><link rel="alternate" hreflang="fa" href="https://www.tensorflow.org/guide/data?hl=fa" /><link rel="alternate" hreflang="fr" href="https://www.tensorflow.org/guide/data?hl=fr" /><link rel="alternate" hreflang="he" href="https://www.tensorflow.org/guide/data?hl=he" /><link rel="alternate" hreflang="hi" href="https://www.tensorflow.org/guide/data?hl=hi" /><link rel="alternate" hreflang="id" href="https://www.tensorflow.org/guide/data?hl=id" /><link rel="alternate" hreflang="it" href="https://www.tensorflow.org/guide/data?hl=it" /><link rel="alternate" hreflang="ja" href="https://www.tensorflow.org/guide/data?hl=ja" /><link rel="alternate" hreflang="ko" href="https://www.tensorflow.org/guide/data?hl=ko" /><link rel="alternate" hreflang="pl" href="https://www.tensorflow.org/guide/data?hl=pl" /><link rel="alternate" hreflang="pt-BR" href="https://www.tensorflow.org/guide/data?hl=pt-br" /><link rel="alternate" hreflang="ru" href="https://www.tensorflow.org/guide/data?hl=ru" /><link rel="alternate" hreflang="es-419" href="https://www.tensorflow.org/guide/data?hl=es-419" /><link rel="alternate" hreflang="th" href="https://www.tensorflow.org/guide/data?hl=th" /><link rel="alternate" hreflang="tr" href="https://www.tensorflow.org/guide/data?hl=tr" /><link rel="alternate" hreflang="vi" href="https://www.tensorflow.org/guide/data?hl=vi" /><title>tf.data: Build TensorFlow input pipelines | TensorFlow Core</title> <meta property="og:title" content="tf.data: Build TensorFlow input pipelines | TensorFlow Core"><meta property="og:url" content="https://www.tensorflow.org/guide/data"><meta property="og:image" content="https://www.tensorflow.org/static/images/tf_logo_social.png"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="675"><meta property="og:locale" content="en"><meta name="twitter:card" content="summary_large_image"><script type="application/ld+json"> { "@context": "https://schema.org", "@type": "Article", "headline": "tf.data: Build TensorFlow input pipelines" } </script><script type="application/ld+json"> { "@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{ "@type": "ListItem", "position": 1, "name": "TensorFlow Core", "item": "https://www.tensorflow.org/tutorials" },{ "@type": "ListItem", "position": 2, "name": "tf.data: Build TensorFlow input pipelines", "item": "https://www.tensorflow.org/guide/data" }] } </script> <link rel="stylesheet" href="/extras.css"></head> <body class="" template="page" theme="tensorflow-theme" type="article" layout="docs" display-toc pending> <devsite-progress type="indeterminate" id="app-progress"></devsite-progress> <a href="#main-content" class="skip-link button"> Skip to main content </a> <section class="devsite-wrapper"> <devsite-cookie-notification-bar></devsite-cookie-notification-bar><devsite-header role="banner"> <div class="devsite-header--inner nocontent"> <div class="devsite-top-logo-row-wrapper-wrapper"> <div class="devsite-top-logo-row-wrapper"> <div class="devsite-top-logo-row"> <button type="button" id="devsite-hamburger-menu" class="devsite-header-icon-button button-flat material-icons gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Navigation menu button" visually-hidden aria-label="Open menu"> </button> <div class="devsite-product-name-wrapper"> <a href="/" class="devsite-site-logo-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Site logo" track-type="globalNav" track-name="tensorFlow" track-metadata-position="nav" track-metadata-eventDetail="nav"> <picture> <img src="https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/tensorflow/images/lockup.svg" class="devsite-site-logo" alt="TensorFlow"> </picture> </a> <span class="devsite-product-name"> <ul class="devsite-breadcrumb-list" > <li class="devsite-breadcrumb-item "> </li> </ul> </span> </div> <div class="devsite-top-logo-row-middle"> <div class="devsite-header-upper-tabs"> <devsite-tabs class="upper-tabs"> <nav class="devsite-tabs-wrapper" aria-label="Upper tabs"> <tab > <a href="https://www.tensorflow.org/install" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://www.tensorflow.org/install" track-type="nav" track-metadata-position="nav - install" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Install" track-name="install" > Install </a> </tab> <tab class="devsite-dropdown devsite-active "> <a href="https://www.tensorflow.org/learn" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://www.tensorflow.org/learn" track-type="nav" track-metadata-position="nav - learn" track-metadata-module="primary nav" aria-label="Learn, selected" data-category="Site-Wide Custom Events" data-label="Tab: Learn" track-name="learn" > Learn </a> <button aria-haspopup="menu" aria-expanded="false" aria-label="Dropdown menu for Learn" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/learn" track-metadata-position="nav - learn" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Learn" track-name="learn" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></button> <div class="devsite-tabs-dropdown" role="menu" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column tfo-menu-column-learn"> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/learn" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/learn" track-metadata-position="nav - learn" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Introduction </div> <div class="devsite-nav-item-description"> New to TensorFlow? </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/tutorials" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/tutorials" track-metadata-position="nav - learn" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Tutorials </div> <div class="devsite-nav-item-description"> Learn how to use TensorFlow with end-to-end examples </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/guide" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/guide" track-metadata-position="nav - learn" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Guide </div> <div class="devsite-nav-item-description"> Learn framework concepts and components </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/learn-ml" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/learn-ml" track-metadata-position="nav - learn" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Learn ML </div> <div class="devsite-nav-item-description"> Educational resources to master your path with TensorFlow </div> </a> </li> </ul> </div> </div> </div> </tab> <tab class="devsite-dropdown "> <a href="https://www.tensorflow.org/api" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://www.tensorflow.org/api" track-type="nav" track-metadata-position="nav - api" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: API" track-name="api" > API </a> <button aria-haspopup="menu" aria-expanded="false" aria-label="Dropdown menu for API" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/api" track-metadata-position="nav - api" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: API" track-name="api" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></button> <div class="devsite-tabs-dropdown" role="menu" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/api/stable" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/api/stable" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> TensorFlow (v2.16.1) </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/versions" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/versions" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Versions… </div> </a> </li> </ul> </div> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://js.tensorflow.org/api/latest/" track-type="nav" track-metadata-eventdetail="https://js.tensorflow.org/api/latest/" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> TensorFlow.js </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/lite/api_docs" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/lite/api_docs" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> TensorFlow Lite </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/tfx/api_docs" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/tfx/api_docs" track-metadata-position="nav - api" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> TFX </div> </a> </li> </ul> </div> </div> </div> </tab> <tab class="devsite-dropdown "> <a href="https://www.tensorflow.org/resources/models-datasets" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://www.tensorflow.org/resources/models-datasets" track-type="nav" track-metadata-position="nav - ecosystem" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Ecosystem" track-name="ecosystem" > Ecosystem </a> <button aria-haspopup="menu" aria-expanded="false" aria-label="Dropdown menu for Ecosystem" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/models-datasets" track-metadata-position="nav - ecosystem" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Ecosystem" track-name="ecosystem" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></button> <div class="devsite-tabs-dropdown" role="menu" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-title" role="heading" tooltip>LIBRARIES</li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/js" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/js" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="libraries" tooltip > <div class="devsite-nav-item-title"> TensorFlow.js </div> <div class="devsite-nav-item-description"> Develop web ML applications in JavaScript </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/lite" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/lite" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="libraries" tooltip > <div class="devsite-nav-item-title"> TensorFlow Lite </div> <div class="devsite-nav-item-description"> Deploy ML on mobile, microcontrollers and other edge devices </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/tfx" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/tfx" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="libraries" tooltip > <div class="devsite-nav-item-title"> TFX </div> <div class="devsite-nav-item-description"> Build production ML pipelines </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/libraries-extensions" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/libraries-extensions" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="libraries" tooltip > <div class="devsite-nav-item-title"> All libraries </div> <div class="devsite-nav-item-description"> Create advanced models and extend TensorFlow </div> </a> </li> </ul> </div> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-title" role="heading" tooltip>RESOURCES</li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/models-datasets" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/models-datasets" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="resources" tooltip > <div class="devsite-nav-item-title"> Models & datasets </div> <div class="devsite-nav-item-description"> Pre-trained models and datasets built by Google and the community </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/tools" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/tools" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="resources" tooltip > <div class="devsite-nav-item-title"> Tools </div> <div class="devsite-nav-item-description"> Tools to support and accelerate TensorFlow workflows </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/responsible_ai" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/responsible_ai" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="resources" tooltip > <div class="devsite-nav-item-title"> Responsible AI </div> <div class="devsite-nav-item-description"> Resources for every stage of the ML workflow </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/resources/recommendation-systems" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/resources/recommendation-systems" track-metadata-position="nav - ecosystem" track-metadata-module="tertiary nav" track-metadata-module_headline="resources" tooltip > <div class="devsite-nav-item-title"> Recommendation systems </div> <div class="devsite-nav-item-description"> Build recommendation systems with open source tools </div> </a> </li> </ul> </div> </div> </div> </tab> <tab class="devsite-dropdown "> <a href="https://www.tensorflow.org/community" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://www.tensorflow.org/community" track-type="nav" track-metadata-position="nav - community" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Community" track-name="community" > Community </a> <button aria-haspopup="menu" aria-expanded="false" aria-label="Dropdown menu for Community" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/community" track-metadata-position="nav - community" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Community" track-name="community" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></button> <div class="devsite-tabs-dropdown" role="menu" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/community/groups" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/community/groups" track-metadata-position="nav - community" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Groups </div> <div class="devsite-nav-item-description"> User groups, interest groups and mailing lists </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/community/contribute" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/community/contribute" track-metadata-position="nav - community" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Contribute </div> <div class="devsite-nav-item-description"> Guide for contributing to code and documentation </div> </a> </li> <li class="devsite-nav-item"> <a href="https://blog.tensorflow.org/" track-type="nav" track-metadata-eventdetail="https://blog.tensorflow.org/" track-metadata-position="nav - community" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Blog </div> <div class="devsite-nav-item-description"> Stay up to date with all things TensorFlow </div> </a> </li> <li class="devsite-nav-item"> <a href="https://discuss.tensorflow.org" track-type="nav" track-metadata-eventdetail="https://discuss.tensorflow.org" track-metadata-position="nav - community" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Forum </div> <div class="devsite-nav-item-description"> Discussion platform for the TensorFlow community </div> </a> </li> </ul> </div> </div> </div> </tab> <tab class="devsite-dropdown "> <a href="https://www.tensorflow.org/about" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://www.tensorflow.org/about" track-type="nav" track-metadata-position="nav - why tensorflow" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Why TensorFlow" track-name="why tensorflow" > Why TensorFlow </a> <button aria-haspopup="menu" aria-expanded="false" aria-label="Dropdown menu for Why TensorFlow" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/about" track-metadata-position="nav - why tensorflow" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Why TensorFlow" track-name="why tensorflow" class="devsite-tabs-dropdown-toggle devsite-icon devsite-icon-arrow-drop-down"></button> <div class="devsite-tabs-dropdown" role="menu" aria-label="submenu" hidden> <div class="devsite-tabs-dropdown-content"> <div class="devsite-tabs-dropdown-column "> <ul class="devsite-tabs-dropdown-section "> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/about" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/about" track-metadata-position="nav - why tensorflow" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> About </div> </a> </li> <li class="devsite-nav-item"> <a href="https://www.tensorflow.org/about/case-studies" track-type="nav" track-metadata-eventdetail="https://www.tensorflow.org/about/case-studies" track-metadata-position="nav - why tensorflow" track-metadata-module="tertiary nav" tooltip > <div class="devsite-nav-item-title"> Case studies </div> </a> </li> </ul> </div> </div> </div> </tab> </nav> </devsite-tabs> </div> <devsite-search enable-signin enable-search enable-suggestions enable-query-completion project-name="TensorFlow Core" tenant-name="TensorFlow" > <form class="devsite-search-form" action="https://www.tensorflow.org/s/results" method="GET"> <div class="devsite-search-container"> <button type="button" search-open class="devsite-search-button devsite-header-icon-button button-flat material-icons" aria-label="Open search"></button> <div class="devsite-searchbox"> <input aria-activedescendant="" aria-autocomplete="list" aria-label="Search" aria-expanded="false" aria-haspopup="listbox" autocomplete="off" class="devsite-search-field devsite-search-query" name="q" placeholder="Search" role="combobox" type="text" value="" > <div class="devsite-search-image material-icons" aria-hidden="true"> </div> <div class="devsite-search-shortcut-icon-container" aria-hidden="true"> <kbd class="devsite-search-shortcut-icon">/</kbd> </div> </div> </div> </form> <button type="button" search-close class="devsite-search-button devsite-header-icon-button button-flat material-icons" aria-label="Close search"></button> </devsite-search> </div> <devsite-language-selector> <ul role="presentation"> <li role="presentation"> <a role="menuitem" lang="en" >English</a> </li> <li role="presentation"> <a role="menuitem" lang="es_419" >Español – América Latina</a> </li> <li role="presentation"> <a role="menuitem" lang="fr" >Français</a> </li> <li role="presentation"> <a role="menuitem" lang="id" >Indonesia</a> </li> <li role="presentation"> <a role="menuitem" lang="it" >Italiano</a> </li> <li role="presentation"> <a role="menuitem" lang="pl" >Polski</a> </li> <li role="presentation"> <a role="menuitem" lang="pt_br" >Português – Brasil</a> </li> <li role="presentation"> <a role="menuitem" lang="vi" >Tiếng Việt</a> </li> <li role="presentation"> <a role="menuitem" lang="tr" >Türkçe</a> </li> <li role="presentation"> <a role="menuitem" lang="ru" >Русский</a> </li> <li role="presentation"> <a role="menuitem" lang="he" >עברית</a> </li> <li role="presentation"> <a role="menuitem" lang="ar" >العربيّة</a> </li> <li role="presentation"> <a role="menuitem" lang="fa" >فارسی</a> </li> <li role="presentation"> <a role="menuitem" lang="hi" >हिंदी</a> </li> <li role="presentation"> <a role="menuitem" lang="bn" >বাংলা</a> </li> <li role="presentation"> <a role="menuitem" lang="th" >ภาษาไทย</a> </li> <li role="presentation"> <a role="menuitem" lang="zh_cn" >中文 – 简体</a> </li> <li role="presentation"> <a role="menuitem" lang="ja" >日本語</a> </li> <li role="presentation"> <a role="menuitem" lang="ko" >한국어</a> </li> </ul> </devsite-language-selector> <a class="devsite-header-link devsite-top-button button gc-analytics-event" href="//github.com/tensorflow" data-category="Site-Wide Custom Events" data-label="Site header link" > GitHub </a> <devsite-user enable-profiles id="devsite-user"> <span class="button devsite-top-button" aria-hidden="true" visually-hidden>Sign in</span> </devsite-user> </div> </div> </div> <div class="devsite-collapsible-section "> <div class="devsite-header-background"> <div class="devsite-product-id-row" > <div class="devsite-product-description-row"> <ul class="devsite-breadcrumb-list" > <li class="devsite-breadcrumb-item "> <a href="https://www.tensorflow.org/tutorials" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Lower Header" data-value="1" track-type="globalNav" track-name="breadcrumb" track-metadata-position="1" track-metadata-eventdetail="TensorFlow Core" > TensorFlow Core </a> </li> </ul> </div> </div> <div class="devsite-doc-set-nav-row"> <devsite-tabs class="lower-tabs"> <nav class="devsite-tabs-wrapper" aria-label="Lower tabs"> <tab > <a href="https://www.tensorflow.org/tutorials" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://www.tensorflow.org/tutorials" track-type="nav" track-metadata-position="nav - tutorials" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Tutorials" track-name="tutorials" > Tutorials </a> </tab> <tab class="devsite-active"> <a href="https://www.tensorflow.org/guide" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://www.tensorflow.org/guide" track-type="nav" track-metadata-position="nav - guide" track-metadata-module="primary nav" aria-label="Guide, selected" data-category="Site-Wide Custom Events" data-label="Tab: Guide" track-name="guide" > Guide </a> </tab> <tab > <a href="https://www.tensorflow.org/guide/migrate" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://www.tensorflow.org/guide/migrate" track-type="nav" track-metadata-position="nav - migrate to tf2" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: Migrate to TF2" track-name="migrate to tf2" > Migrate to TF2 </a> </tab> <tab > <a href="https://github.com/tensorflow/docs/tree/master/site/en/r1" class="devsite-tabs-content gc-analytics-event " track-metadata-eventdetail="https://github.com/tensorflow/docs/tree/master/site/en/r1" track-type="nav" track-metadata-position="nav - tf 1 ↗" track-metadata-module="primary nav" data-category="Site-Wide Custom Events" data-label="Tab: TF 1 ↗" track-name="tf 1 ↗" > TF 1 ↗ </a> </tab> </nav> </devsite-tabs> </div> </div> </div> </div> </devsite-header> <devsite-book-nav scrollbars > <div class="devsite-book-nav-filter" > <span class="filter-list-icon material-icons" aria-hidden="true"></span> <input type="text" placeholder="Filter" aria-label="Type to filter" role="searchbox"> <span class="filter-clear-button hidden" data-title="Clear filter" aria-label="Clear filter" role="button" tabindex="0"></span> </div> <nav class="devsite-book-nav devsite-nav nocontent" aria-label="Side menu"> <div class="devsite-mobile-header"> <button type="button" id="devsite-close-nav" class="devsite-header-icon-button button-flat material-icons gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Close navigation" aria-label="Close navigation"> </button> <div class="devsite-product-name-wrapper"> <a href="/" class="devsite-site-logo-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Site logo" track-type="globalNav" track-name="tensorFlow" track-metadata-position="nav" track-metadata-eventDetail="nav"> <picture> <img src="https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/tensorflow/images/lockup.svg" class="devsite-site-logo" alt="TensorFlow"> </picture> </a> <span class="devsite-product-name"> <ul class="devsite-breadcrumb-list" > <li class="devsite-breadcrumb-item "> </li> </ul> </span> </div> </div> <div class="devsite-book-nav-wrapper"> <div class="devsite-mobile-nav-top"> <ul class="devsite-nav-list"> <li class="devsite-nav-item"> <a href="/install" class="devsite-nav-title gc-analytics-event devsite-nav-has-children " data-category="Site-Wide Custom Events" data-label="Tab: Install" track-name="install" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Install" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Install </span> <span class="devsite-nav-icon material-icons" data-icon="forward" > </span> </a> </li> <li class="devsite-nav-item"> <a href="/learn" class="devsite-nav-title gc-analytics-event devsite-nav-active" data-category="Site-Wide Custom Events" data-label="Tab: Learn" track-name="learn" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Learn" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Learn </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: Learn" track-name="learn" > <span class="devsite-nav-text" tooltip menu="Learn"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="Learn"> </span> </span> </li> </ul> <ul class="devsite-nav-responsive-tabs"> <li class="devsite-nav-item"> <a href="/tutorials" class="devsite-nav-title gc-analytics-event devsite-nav-has-children " data-category="Site-Wide Custom Events" data-label="Tab: Tutorials" track-name="tutorials" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Tutorials" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Tutorials </span> <span class="devsite-nav-icon material-icons" data-icon="forward" > </span> </a> </li> <li class="devsite-nav-item"> <a href="/guide" class="devsite-nav-title gc-analytics-event devsite-nav-has-children devsite-nav-active" data-category="Site-Wide Custom Events" data-label="Tab: Guide" track-name="guide" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Guide" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip menu="_book"> Guide </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="_book"> </span> </a> </li> <li class="devsite-nav-item"> <a href="/guide/migrate" class="devsite-nav-title gc-analytics-event devsite-nav-has-children " data-category="Site-Wide Custom Events" data-label="Tab: Migrate to TF2" track-name="migrate to tf2" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Migrate to TF2" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Migrate to TF2 </span> <span class="devsite-nav-icon material-icons" data-icon="forward" > </span> </a> </li> <li class="devsite-nav-item"> <a href="https://github.com/tensorflow/docs/tree/master/site/en/r1" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: TF 1 ↗" track-name="tf 1 ↗" data-category="Site-Wide Custom Events" data-label="Responsive Tab: TF 1 ↗" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TF 1 ↗ </span> </a> </li> </ul> </li> <li class="devsite-nav-item"> <a href="/api" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: API" track-name="api" data-category="Site-Wide Custom Events" data-label="Responsive Tab: API" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > API </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: API" track-name="api" > <span class="devsite-nav-text" tooltip menu="API"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="API"> </span> </span> </li> </ul> </li> <li class="devsite-nav-item"> <a href="/resources/models-datasets" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: Ecosystem" track-name="ecosystem" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Ecosystem" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Ecosystem </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: Ecosystem" track-name="ecosystem" > <span class="devsite-nav-text" tooltip menu="Ecosystem"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="Ecosystem"> </span> </span> </li> </ul> </li> <li class="devsite-nav-item"> <a href="/community" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: Community" track-name="community" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Community" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Community </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: Community" track-name="community" > <span class="devsite-nav-text" tooltip menu="Community"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="Community"> </span> </span> </li> </ul> </li> <li class="devsite-nav-item"> <a href="/about" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Tab: Why TensorFlow" track-name="why tensorflow" data-category="Site-Wide Custom Events" data-label="Responsive Tab: Why TensorFlow" track-type="globalNav" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Why TensorFlow </span> </a> <ul class="devsite-nav-responsive-tabs devsite-nav-has-menu "> <li class="devsite-nav-item"> <span class="devsite-nav-title" tooltip data-category="Site-Wide Custom Events" data-label="Tab: Why TensorFlow" track-name="why tensorflow" > <span class="devsite-nav-text" tooltip menu="Why TensorFlow"> More </span> <span class="devsite-nav-icon material-icons" data-icon="forward" menu="Why TensorFlow"> </span> </span> </li> </ul> </li> <li class="devsite-nav-item"> <a href="//github.com/tensorflow" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: GitHub" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > GitHub </span> </a> </li> </ul> </div> <div class="devsite-mobile-nav-bottom"> <ul class="devsite-nav-list" menu="_book"> <li class="devsite-nav-item"><a href="/guide" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide" ><span class="devsite-nav-text" tooltip>TensorFlow guide</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>TensorFlow basics</span> </div></li> <li class="devsite-nav-item"><a href="/guide/basics" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/basics" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/basics" ><span class="devsite-nav-text" tooltip>Overview</span></a></li> <li class="devsite-nav-item"><a href="/guide/tensor" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tensor" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tensor" ><span class="devsite-nav-text" tooltip>Tensors</span></a></li> <li class="devsite-nav-item"><a href="/guide/variable" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/variable" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/variable" ><span class="devsite-nav-text" tooltip>Variables</span></a></li> <li class="devsite-nav-item"><a href="/guide/autodiff" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/autodiff" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/autodiff" ><span class="devsite-nav-text" tooltip>Automatic differentiation</span></a></li> <li class="devsite-nav-item"><a href="/guide/intro_to_graphs" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/intro_to_graphs" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/intro_to_graphs" ><span class="devsite-nav-text" tooltip>Graphs and functions</span></a></li> <li class="devsite-nav-item"><a href="/guide/intro_to_modules" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/intro_to_modules" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/intro_to_modules" ><span class="devsite-nav-text" tooltip>Modules, layers, and models</span></a></li> <li class="devsite-nav-item"><a href="/guide/basic_training_loops" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/basic_training_loops" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/basic_training_loops" ><span class="devsite-nav-text" tooltip>Training loops</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Keras</span> </div></li> <li class="devsite-nav-item"><a href="/guide/keras" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras" ><span class="devsite-nav-text" tooltip>Overview</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/sequential_model" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/sequential_model" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/sequential_model" ><span class="devsite-nav-text" tooltip>The Sequential model</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/functional_api" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/functional_api" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/functional_api" ><span class="devsite-nav-text" tooltip>The Functional API</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/training_with_built_in_methods" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/training_with_built_in_methods" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/training_with_built_in_methods" ><span class="devsite-nav-text" tooltip>Training & evaluation with the built-in methods</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/making_new_layers_and_models_via_subclassing" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/making_new_layers_and_models_via_subclassing" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/making_new_layers_and_models_via_subclassing" ><span class="devsite-nav-text" tooltip>Making new layers and models via subclassing</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/serialization_and_saving" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/serialization_and_saving" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/serialization_and_saving" ><span class="devsite-nav-text" tooltip>Serialization and saving</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/customizing_saving_and_serialization" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/customizing_saving_and_serialization" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/customizing_saving_and_serialization" ><span class="devsite-nav-text" tooltip>Customizing Saving</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/preprocessing_layers" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/preprocessing_layers" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/preprocessing_layers" ><span class="devsite-nav-text" tooltip>Working with preprocessing layers</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/customizing_what_happens_in_fit" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/customizing_what_happens_in_fit" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/customizing_what_happens_in_fit" ><span class="devsite-nav-text" tooltip>Customizing what happens in fit()</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/writing_a_training_loop_from_scratch" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/writing_a_training_loop_from_scratch" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/writing_a_training_loop_from_scratch" ><span class="devsite-nav-text" tooltip>Writing a training loop from scratch</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/working_with_rnns" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/working_with_rnns" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/working_with_rnns" ><span class="devsite-nav-text" tooltip>Working with RNNs</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/understanding_masking_and_padding" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/understanding_masking_and_padding" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/understanding_masking_and_padding" ><span class="devsite-nav-text" tooltip>Understanding masking & padding</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/writing_your_own_callbacks" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/writing_your_own_callbacks" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/writing_your_own_callbacks" ><span class="devsite-nav-text" tooltip>Writing your own callbacks</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/transfer_learning" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/transfer_learning" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/transfer_learning" ><span class="devsite-nav-text" tooltip>Transfer learning & fine-tuning</span></a></li> <li class="devsite-nav-item"><a href="/guide/keras/distributed_training" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/keras/distributed_training" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/keras/distributed_training" ><span class="devsite-nav-text" tooltip>Multi-GPU and distributed training</span></a></li> <li class="devsite-nav-item devsite-nav-heading devsite-nav-new"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Build with Core</span><span class="devsite-nav-icon material-icons" data-icon="new" data-title="New!" aria-hidden="true"></span> </div></li> <li class="devsite-nav-item"><a href="/guide/core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core" ><span class="devsite-nav-text" tooltip>Overview</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/quickstart_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/quickstart_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/quickstart_core" ><span class="devsite-nav-text" tooltip>Quickstart for Core</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/logistic_regression_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/logistic_regression_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/logistic_regression_core" ><span class="devsite-nav-text" tooltip>Logistic regression</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/mlp_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/mlp_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/mlp_core" ><span class="devsite-nav-text" tooltip>Multilayer perceptrons</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/matrix_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/matrix_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/matrix_core" ><span class="devsite-nav-text" tooltip>Matrix approximation</span></a></li> <li class="devsite-nav-item"><a href="/guide/core/optimizers_core" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/optimizers_core" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/optimizers_core" ><span class="devsite-nav-text" tooltip>Custom optimizers</span></a></li> <li class="devsite-nav-item devsite-nav-experimental"><a href="/guide/core/distribution" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/core/distribution" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/core/distribution" ><span class="devsite-nav-text" tooltip>DTensor with Core APIs</span><span class="devsite-nav-icon material-icons" data-icon="experimental" data-title="Experimental!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>TensorFlow in depth</span> </div></li> <li class="devsite-nav-item"><a href="/guide/tensor_slicing" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tensor_slicing" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tensor_slicing" ><span class="devsite-nav-text" tooltip>Tensor slicing</span></a></li> <li class="devsite-nav-item"><a href="/guide/advanced_autodiff" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/advanced_autodiff" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/advanced_autodiff" ><span class="devsite-nav-text" tooltip>Advanced autodiff</span></a></li> <li class="devsite-nav-item"><a href="/guide/ragged_tensor" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/ragged_tensor" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/ragged_tensor" ><span class="devsite-nav-text" tooltip>Ragged tensor</span></a></li> <li class="devsite-nav-item"><a href="/guide/sparse_tensor" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/sparse_tensor" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/sparse_tensor" ><span class="devsite-nav-text" tooltip>Sparse tensor</span></a></li> <li class="devsite-nav-item"><a href="/guide/random_numbers" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/random_numbers" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/random_numbers" ><span class="devsite-nav-text" tooltip>Random number generation</span></a></li> <li class="devsite-nav-item devsite-nav-experimental"><a href="/guide/tf_numpy" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tf_numpy" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tf_numpy" ><span class="devsite-nav-text" tooltip>NumPy API</span><span class="devsite-nav-icon material-icons" data-icon="experimental" data-title="Experimental!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-nightly"><a href="/guide/tf_numpy_type_promotion" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tf_numpy_type_promotion" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tf_numpy_type_promotion" ><span class="devsite-nav-text" tooltip>NumPy API Type Promotion</span><span class="devsite-nav-icon material-icons" data-icon="nightly" data-title="Nightly build only" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-experimental"><a href="/guide/dtensor_overview" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/dtensor_overview" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/dtensor_overview" ><span class="devsite-nav-text" tooltip>DTensor concepts</span><span class="devsite-nav-icon material-icons" data-icon="experimental" data-title="Experimental!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item"><a href="/guide/effective_tf2" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/effective_tf2" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/effective_tf2" ><span class="devsite-nav-text" tooltip>Thinking in TensorFlow 2</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Customization</span> </div></li> <li class="devsite-nav-item"><a href="/guide/create_op" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/create_op" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/create_op" ><span class="devsite-nav-text" tooltip>Create an op</span></a></li> <li class="devsite-nav-item devsite-nav-experimental"><a href="/guide/extension_type" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/extension_type" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/extension_type" ><span class="devsite-nav-text" tooltip>Extension types</span><span class="devsite-nav-icon material-icons" data-icon="experimental" data-title="Experimental!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Data input pipelines</span> </div></li> <li class="devsite-nav-item"><a href="/guide/data" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/data" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/data" ><span class="devsite-nav-text" tooltip>tf.data</span></a></li> <li class="devsite-nav-item"><a href="/guide/data_performance" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/data_performance" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/data_performance" ><span class="devsite-nav-text" tooltip>Optimize pipeline performance</span></a></li> <li class="devsite-nav-item"><a href="/guide/data_performance_analysis" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/data_performance_analysis" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/data_performance_analysis" ><span class="devsite-nav-text" tooltip>Analyze pipeline performance</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Import and export</span> </div></li> <li class="devsite-nav-item"><a href="/guide/checkpoint" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/checkpoint" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/checkpoint" ><span class="devsite-nav-text" tooltip>Checkpoint</span></a></li> <li class="devsite-nav-item"><a href="/guide/saved_model" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/saved_model" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/saved_model" ><span class="devsite-nav-text" tooltip>SavedModel</span></a></li> <li class="devsite-nav-item devsite-nav-new"><a href="/guide/jax2tf" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/jax2tf" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/jax2tf" ><span class="devsite-nav-text" tooltip>Import a JAX model using JAX2TF</span><span class="devsite-nav-icon material-icons" data-icon="new" data-title="New!" aria-hidden="true"></span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Accelerators</span> </div></li> <li class="devsite-nav-item"><a href="/guide/distributed_training" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/distributed_training" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/distributed_training" ><span class="devsite-nav-text" tooltip>Distributed training</span></a></li> <li class="devsite-nav-item"><a href="/guide/gpu" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/gpu" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/gpu" ><span class="devsite-nav-text" tooltip>GPU</span></a></li> <li class="devsite-nav-item"><a href="/guide/tpu" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/tpu" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/tpu" ><span class="devsite-nav-text" tooltip>TPU</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Performance</span> </div></li> <li class="devsite-nav-item"><a href="/guide/function" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/function" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/function" ><span class="devsite-nav-text" tooltip>Better performance with tf.function</span></a></li> <li class="devsite-nav-item"><a href="/guide/profiler" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/profiler" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/profiler" ><span class="devsite-nav-text" tooltip>Profile TensorFlow performance</span></a></li> <li class="devsite-nav-item"><a href="/guide/gpu_performance_analysis" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/gpu_performance_analysis" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/gpu_performance_analysis" ><span class="devsite-nav-text" tooltip>Optimize GPU Performance</span></a></li> <li class="devsite-nav-item"><a href="/guide/graph_optimization" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/graph_optimization" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/graph_optimization" ><span class="devsite-nav-text" tooltip>Graph optimization</span></a></li> <li class="devsite-nav-item"><a href="/guide/mixed_precision" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/mixed_precision" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/mixed_precision" ><span class="devsite-nav-text" tooltip>Mixed precision</span></a></li> <li class="devsite-nav-item devsite-nav-heading devsite-nav-new"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Model Garden</span><span class="devsite-nav-icon material-icons" data-icon="new" data-title="New!" aria-hidden="true"></span> </div></li> <li class="devsite-nav-item"><a href="/tfmodels" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels" ><span class="devsite-nav-text" tooltip>Overview</span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/orbit" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/orbit" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/orbit" ><span class="devsite-nav-text" tooltip>Training with Orbit</span></a></li> <li class="devsite-nav-item devsite-nav-external"><a href="/tfmodels/nlp" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/nlp" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/nlp" ><span class="devsite-nav-text" tooltip>TFModels - NLP</span><span class="devsite-nav-icon material-icons" data-icon="external" data-title="External" aria-hidden="true"></span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/vision/image_classification" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/vision/image_classification" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/vision/image_classification" ><span class="devsite-nav-text" tooltip>Example: Image classification</span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/vision/object_detection" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/vision/object_detection" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/vision/object_detection" ><span class="devsite-nav-text" tooltip>Example: Object Detection</span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/vision/semantic_segmentation" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/vision/semantic_segmentation" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/vision/semantic_segmentation" ><span class="devsite-nav-text" tooltip>Example: Semantic Segmentation</span></a></li> <li class="devsite-nav-item"><a href="/tfmodels/vision/instance_segmentation" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /tfmodels/vision/instance_segmentation" track-type="bookNav" track-name="click" track-metadata-eventdetail="/tfmodels/vision/instance_segmentation" ><span class="devsite-nav-text" tooltip>Example: Instance Segmentation</span></a></li> <li class="devsite-nav-item devsite-nav-heading devsite-nav-deprecated"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Estimators</span><span class="devsite-nav-icon material-icons" data-icon="deprecated" data-title="Deprecated" aria-hidden="true"></span> </div></li> <li class="devsite-nav-item"><a href="/guide/estimator" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/estimator" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/estimator" ><span class="devsite-nav-text" tooltip>Estimator overview</span></a></li> <li class="devsite-nav-item devsite-nav-heading"><div class="devsite-nav-title devsite-nav-title-no-path"> <span class="devsite-nav-text" tooltip>Appendix</span> </div></li> <li class="devsite-nav-item"><a href="/guide/versions" class="devsite-nav-title gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Book nav link, pathname: /guide/versions" track-type="bookNav" track-name="click" track-metadata-eventdetail="/guide/versions" ><span class="devsite-nav-text" tooltip>Version compatibility</span></a></li> </ul> <ul class="devsite-nav-list" menu="Learn" aria-label="Side menu" hidden> <li class="devsite-nav-item"> <a href="/learn" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Introduction" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Introduction </span> </a> </li> <li class="devsite-nav-item"> <a href="/tutorials" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Tutorials" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Tutorials </span> </a> </li> <li class="devsite-nav-item"> <a href="/guide" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Guide" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Guide </span> </a> </li> <li class="devsite-nav-item"> <a href="/resources/learn-ml" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Learn ML" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Learn ML </span> </a> </li> </ul> <ul class="devsite-nav-list" menu="API" aria-label="Side menu" hidden> <li class="devsite-nav-item"> <a href="/api/stable" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow (v2.16.1)" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow (v2.16.1) </span> </a> </li> <li class="devsite-nav-item"> <a href="/versions" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Versions…" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Versions… </span> </a> </li> <li class="devsite-nav-item"> <a href="https://js.tensorflow.org/api/latest/" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow.js" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow.js </span> </a> </li> <li class="devsite-nav-item"> <a href="/lite/api_docs" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow Lite" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow Lite </span> </a> </li> <li class="devsite-nav-item"> <a href="/tfx/api_docs" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TFX" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TFX </span> </a> </li> </ul> <ul class="devsite-nav-list" menu="Ecosystem" aria-label="Side menu" hidden> <li class="devsite-nav-item devsite-nav-heading"> <span class="devsite-nav-title" tooltip > <span class="devsite-nav-text" tooltip > LIBRARIES </span> </span> </li> <li class="devsite-nav-item"> <a href="/js" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow.js" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow.js </span> </a> </li> <li class="devsite-nav-item"> <a href="/lite" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TensorFlow Lite" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TensorFlow Lite </span> </a> </li> <li class="devsite-nav-item"> <a href="/tfx" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: TFX" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > TFX </span> </a> </li> <li class="devsite-nav-item"> <a href="/resources/libraries-extensions" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: All libraries" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > All libraries </span> </a> </li> <li class="devsite-nav-item devsite-nav-heading"> <span class="devsite-nav-title" tooltip > <span class="devsite-nav-text" tooltip > RESOURCES </span> </span> </li> <li class="devsite-nav-item"> <a href="/resources/models-datasets" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Models & datasets" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Models & datasets </span> </a> </li> <li class="devsite-nav-item"> <a href="/resources/tools" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Tools" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Tools </span> </a> </li> <li class="devsite-nav-item"> <a href="/responsible_ai" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Responsible AI" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Responsible AI </span> </a> </li> <li class="devsite-nav-item"> <a href="/resources/recommendation-systems" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Recommendation systems" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Recommendation systems </span> </a> </li> </ul> <ul class="devsite-nav-list" menu="Community" aria-label="Side menu" hidden> <li class="devsite-nav-item"> <a href="/community/groups" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Groups" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Groups </span> </a> </li> <li class="devsite-nav-item"> <a href="/community/contribute" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Contribute" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Contribute </span> </a> </li> <li class="devsite-nav-item"> <a href="https://blog.tensorflow.org/" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Blog" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Blog </span> </a> </li> <li class="devsite-nav-item"> <a href="https://discuss.tensorflow.org" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Forum" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Forum </span> </a> </li> </ul> <ul class="devsite-nav-list" menu="Why TensorFlow" aria-label="Side menu" hidden> <li class="devsite-nav-item"> <a href="/about" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: About" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > About </span> </a> </li> <li class="devsite-nav-item"> <a href="/about/case-studies" class="devsite-nav-title gc-analytics-event " data-category="Site-Wide Custom Events" data-label="Responsive Tab: Case studies" track-type="navMenu" track-metadata-eventDetail="globalMenu" track-metadata-position="nav"> <span class="devsite-nav-text" tooltip > Case studies </span> </a> </li> </ul> </div> </div> </nav> </devsite-book-nav> <section id="gc-wrapper"> <main role="main" id="main-content" class="devsite-main-content" has-book-nav has-sidebar > <div class="devsite-sidebar"> <div class="devsite-sidebar-content"> <devsite-toc class="devsite-nav" role="navigation" aria-label="On this page" depth="2" scrollbars ></devsite-toc> <devsite-recommendations-sidebar class="nocontent devsite-nav"> </devsite-recommendations-sidebar> </div> </div> <devsite-content> <article class="devsite-article"><style> /* Styles inlined from /site-assets/css/style.css */ /* override theme */ table img { max-width: 100%; } /* .devsite-terminal virtualenv prompt */ .tfo-terminal-venv::before { content: "(venv) $ " !important; } /* .devsite-terminal root prompt */ .tfo-terminal-root::before { content: "# " !important; } /* Used in links for type annotations in function/method signatures */ .tfo-signature-link a, .tfo-signature-link a:visited, .tfo-signature-link a:hover, .tfo-signature-link a:focus, .tfo-signature-link a:hover *, .tfo-signature-link a:focus * { text-decoration: none !important; } .tfo-signature-link a, .tfo-signature-link a:visited { border-bottom: 1px dotted #1a73e8; } .tfo-signature-link a:focus { border-bottom-style: solid; } /* .devsite-terminal Windows prompt */ .tfo-terminal-windows::before { content: "C:\\> " !important; } /* .devsite-terminal Windows prompt w/ virtualenv */ .tfo-terminal-windows-venv::before { content: "(venv) C:\\> " !important; } .tfo-diff-green-one-level + * { background: rgba(175, 245, 162, .6) !important; } .tfo-diff-green + * > * { background: rgba(175, 245, 162, .6) !important; } .tfo-diff-green-list + ul > li:first-of-type { background: rgba(175, 245, 162, .6) !important; } .tfo-diff-red-one-level + * { background: rgba(255, 230, 230, .6) !important; text-decoration: line-through !important; } .tfo-diff-red + * > * { background: rgba(255, 230, 230, .6) !important; text-decoration: line-through !important; } .tfo-diff-red-list + ul > li:first-of-type { background: rgba(255, 230, 230, .6) !important; text-decoration: line-through !important; } devsite-code .tfo-notebook-code-cell-output { max-height: 300px; overflow: auto; background: rgba(255, 247, 237, 1); /* orange bg to distinguish from input code cells */ } devsite-code .tfo-notebook-code-cell-output + .devsite-code-buttons-container button { background: rgba(255, 247, 237, .7); /* orange bg to distinguish from input code cells */ } devsite-code[dark-code] .tfo-notebook-code-cell-output { background: rgba(64, 78, 103, 1); /* medium slate */ } devsite-code[dark-code] .tfo-notebook-code-cell-output + .devsite-code-buttons-container button { background: rgba(64, 78, 103, .7); /* medium slate */ } /* override default table styles for notebook buttons */ .devsite-table-wrapper .tfo-notebook-buttons { display: inline-block; margin-left: 3px; width: auto; } .tfo-notebook-buttons td { padding-left: 0; padding-right: 20px; } .tfo-notebook-buttons a, .tfo-notebook-buttons :link, .tfo-notebook-buttons :visited { border-radius: 8px; box-shadow: 0 1px 2px 0 rgba(60, 64, 67, .3), 0 1px 3px 1px rgba(60, 64, 67, .15); color: #202124; padding: 12px 17px; transition: box-shadow 0.2s; } .tfo-notebook-buttons a:hover, .tfo-notebook-buttons a:focus { box-shadow: 0 1px 2px 0 rgba(60, 64, 67, .3), 0 2px 6px 2px rgba(60, 64, 67, .15); } .tfo-notebook-buttons tr { background: 0; border: 0; } /* on rendered notebook page, remove link to webpage since we're already here */ .tfo-notebook-buttons:not(.tfo-api) td:first-child { display: none; } .tfo-notebook-buttons td > a { -webkit-box-align: center; -ms-flex-align: center; align-items: center; display: -webkit-box; display: -ms-flexbox; display: flex; } .tfo-notebook-buttons td > a > img { margin-right: 8px; } /* landing pages */ .tfo-landing-row-item-inset-white { background-color: #fff; padding: 32px; } .tfo-landing-row-item-inset-white ol, .tfo-landing-row-item-inset-white ul { padding-left: 20px; } /* colab callout button */ .colab-callout-row devsite-code { border-radius: 8px 8px 0 0; box-shadow: none; } .colab-callout-footer { background: #e3e4e7; border-radius: 0 0 8px 8px; color: #37474f; padding: 20px; } .colab-callout-row devsite-code[dark-code] + .colab-callout-footer { background: #3f4f66; } .colab-callout-footer > .button { margin-top: 4px; color: #ff5c00; } .colab-callout-footer > a > span { vertical-align: middle; color: #37474f; padding-left: 10px; font-size: 14px; } .colab-callout-row devsite-code[dark-code] + .colab-callout-footer > a > span { color: #fff; } a.colab-button { background: rgba(255, 255, 255, .75); border: solid 1px rgba(0, 0, 0, .08); border-bottom-color: rgba(0, 0, 0, .15); border-radius: 4px; color: #aaa; display: inline-block; font-size: 11px !important; font-weight: 300; line-height: 16px; padding: 4px 8px; text-decoration: none; text-transform: uppercase; } a.colab-button:hover { background: white; border-color: rgba(0, 0, 0, .2); color: #666; } a.colab-button span { background: url(/images/colab_logo_button.svg) no-repeat 1px 1px / 20px; border-radius: 4px; display: inline-block; padding-left: 24px; text-decoration: none; } @media screen and (max-width: 600px) { .tfo-notebook-buttons td { display: block; } } /* guide and tutorials landing page cards and sections */ .tfo-landing-page-card { padding: 16px; box-shadow: 0 0 36px rgba(0,0,0,0.1); border-radius: 10px; } /* Page section headings */ .tfo-landing-page-heading h2, h2.tfo-landing-page-heading { font-family: "Google Sans", sans-serif; color: #425066; font-size: 30px; font-weight: 700; line-height: 40px; } /* Item title headings */ .tfo-landing-page-heading h3, h3.tfo-landing-page-heading, .tfo-landing-page-card h3, h3.tfo-landing-page-card { font-family: "Google Sans", sans-serif; color: #425066; font-size: 20px; font-weight: 500; line-height: 26px; } /* List of tutorials notebooks for subsites */ .tfo-landing-page-resources-ul { padding-left: 15px } .tfo-landing-page-resources-ul > li { margin: 6px 0; } /* Temporary fix to hide product description in header on landing pages */ devsite-header .devsite-product-description { display: none; } </style> <div class="devsite-article-meta nocontent" role="navigation"> <ul class="devsite-breadcrumb-list" aria-label="Breadcrumb"> <li class="devsite-breadcrumb-item "> <a href="https://www.tensorflow.org/" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Breadcrumbs" data-value="1" track-type="globalNav" track-name="breadcrumb" track-metadata-position="1" track-metadata-eventdetail="TensorFlow" > TensorFlow </a> </li> <li class="devsite-breadcrumb-item "> <div class="devsite-breadcrumb-guillemet material-icons" aria-hidden="true"></div> <a href="https://www.tensorflow.org/learn" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Breadcrumbs" data-value="2" track-type="globalNav" track-name="breadcrumb" track-metadata-position="2" track-metadata-eventdetail="" > Learn </a> </li> <li class="devsite-breadcrumb-item "> <div class="devsite-breadcrumb-guillemet material-icons" aria-hidden="true"></div> <a href="https://www.tensorflow.org/tutorials" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Breadcrumbs" data-value="3" track-type="globalNav" track-name="breadcrumb" track-metadata-position="3" track-metadata-eventdetail="TensorFlow Core" > TensorFlow Core </a> </li> <li class="devsite-breadcrumb-item "> <div class="devsite-breadcrumb-guillemet material-icons" aria-hidden="true"></div> <a href="https://www.tensorflow.org/guide" class="devsite-breadcrumb-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Breadcrumbs" data-value="4" track-type="globalNav" track-name="breadcrumb" track-metadata-position="4" track-metadata-eventdetail="" > Guide </a> </li> </ul> <devsite-thumb-rating position="header"> </devsite-thumb-rating> </div> <h1 class="devsite-page-title" tabindex="-1"> tf.data: Build TensorFlow input pipelines </h1> <devsite-feature-tooltip ack-key="AckCollectionsBookmarkTooltipDismiss" analytics-category="Site-Wide Custom Events" analytics-action-show="Callout Profile displayed" analytics-action-close="Callout Profile dismissed" analytics-label="Create Collection Callout" class="devsite-page-bookmark-tooltip nocontent" dismiss-button="true" id="devsite-collections-dropdown" dismiss-button-text="Dismiss" close-button-text="Got it"> <devsite-bookmark></devsite-bookmark> <span slot="popout-heading"> Stay organized with collections </span> <span slot="popout-contents"> Save and categorize content based on your preferences. </span> </devsite-feature-tooltip> <div class="devsite-page-title-meta"><devsite-view-release-notes></devsite-view-release-notes></div> <devsite-toc class="devsite-nav" depth="2" devsite-toc-embedded > </devsite-toc> <div class="devsite-article-body clearfix "> <p></p> <!-- DO NOT EDIT! Automatically generated file. --> <div itemscope itemtype="http://developers.google.com/ReferenceObject"> <meta itemprop="name" content="tf.data: Build TensorFlow input pipelines" /> <meta itemprop="path" content="Guide & Tutorials" /> <meta itemprop="property" content="tf.Variable"/> <meta itemprop="property" content="tf.cast"/> <meta itemprop="property" content="tf.data.Dataset"/> <meta itemprop="property" content="tf.data.TFRecordDataset"/> <meta itemprop="property" content="tf.data.TextLineDataset"/> <meta itemprop="property" content="tf.data.experimental.Counter"/> <meta itemprop="property" content="tf.data.experimental.CsvDataset"/> <meta itemprop="property" content="tf.data.experimental.make_csv_dataset"/> <meta itemprop="property" content="tf.fill"/> <meta itemprop="property" content="tf.image.convert_image_dtype"/> <meta itemprop="property" content="tf.image.resize"/> <meta itemprop="property" content="tf.io.FixedLenFeature"/> <meta itemprop="property" content="tf.io.decode_jpeg"/> <meta itemprop="property" content="tf.io.decode_png"/> <meta itemprop="property" content="tf.io.parse_example"/> <meta itemprop="property" content="tf.io.read_file"/> <meta itemprop="property" content="tf.keras.Sequential"/> <meta itemprop="property" content="tf.keras.datasets.fashion_mnist.load_data"/> <meta itemprop="property" content="tf.keras.layers.Dense"/> <meta itemprop="property" content="tf.keras.layers.Flatten"/> <meta itemprop="property" content="tf.keras.losses.SparseCategoricalCrossentropy"/> <meta itemprop="property" content="tf.keras.preprocessing.image.ImageDataGenerator"/> <meta itemprop="property" content="tf.keras.utils.get_file"/> <meta itemprop="property" content="tf.math.not_equal"/> <meta itemprop="property" content="tf.math.reduce_sum"/> <meta itemprop="property" content="tf.py_function"/> <meta itemprop="property" content="tf.random.uniform"/> <meta itemprop="property" content="tf.sparse.SparseTensor"/> <meta itemprop="property" content="tf.stack"/> <meta itemprop="property" content="tf.strings.split"/> <meta itemprop="property" content="tf.strings.substr"/> <meta itemprop="property" content="tf.train.Checkpoint"/> <meta itemprop="property" content="tf.train.CheckpointManager"/> <meta itemprop="property" content="tf.train.Example"/> </div> <table class="tfo-notebook-buttons" align="left"> <td> <a target="_blank" href="https://www.tensorflow.org/guide/data"><img src="https://www.tensorflow.org/images/tf_logo_32px.png">View on TensorFlow.org</a> </td> <td> <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/data.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png">Run in Google Colab</a> </td> <td> <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/guide/data.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png">View source on GitHub</a> </td> <td> <a href="https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/data.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png">Download notebook</a> </td> </table> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API enables you to build complex input pipelines from simple, reusable pieces. For example, the pipeline for an image model might aggregate data from files in a distributed file system, apply random perturbations to each image, and merge randomly selected images into a batch for training. The pipeline for a text model might involve extracting symbols from raw text data, converting them to embedding identifiers with a lookup table, and batching together sequences of different lengths. The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API makes it possible to handle large amounts of data, read from different data formats, and perform complex transformations.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API introduces a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> abstraction that represents a sequence of elements, in which each element consists of one or more components. For example, in an image pipeline, an element might be a single training example, with a pair of tensor components representing the image and its label.</p> <p>There are two distinct ways to create a dataset:</p> <ul> <li><p>A data <strong>source</strong> constructs a <code translate="no" dir="ltr">Dataset</code> from data stored in memory or in one or more files.</p></li> <li><p>A data <strong>transformation</strong> constructs a dataset from one or more <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> objects.</p></li> </ul> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-kn">import</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">tensorflow</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-k">as</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">tf</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 2024-08-15 01:37:36.963860: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered 2024-08-15 01:37:36.985171: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered 2024-08-15 01:37:36.991452: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-kn">import</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">pathlib</span> <span class="devsite-syntax-kn">import</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">os</span> <span class="devsite-syntax-kn">import</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">matplotlib.pyplot</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-k">as</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">plt</span> <span class="devsite-syntax-kn">import</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">pandas</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-k">as</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">pd</span> <span class="devsite-syntax-kn">import</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">numpy</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-k">as</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">np</span> <span class="devsite-syntax-n">np</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">set_printoptions</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">precision</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <h2 id="basic_mechanics" data-text="Basic mechanics" tabindex="-1">Basic mechanics</h2> <p><a id="basic-mechanics"/></p> <p>To create an input pipeline, you must start with a data <em>source</em>. For example, to construct a <code translate="no" dir="ltr">Dataset</code> from data in memory, you can use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors"><code translate="no" dir="ltr">tf.data.Dataset.from_tensors()</code></a> or <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices"><code translate="no" dir="ltr">tf.data.Dataset.from_tensor_slices()</code></a>. Alternatively, if your input data is stored in a file in the recommended TFRecord format, you can use <a href="https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset"><code translate="no" dir="ltr">tf.data.TFRecordDataset()</code></a>.</p> <p>Once you have a <code translate="no" dir="ltr">Dataset</code> object, you can <em>transform</em> it into a new <code translate="no" dir="ltr">Dataset</code> by chaining method calls on the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> object. For example, you can apply per-element transformations such as <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map</code></a>, and multi-element transformations such as <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a>. Refer to the documentation for <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> for a complete list of transformations.</p> <p>The <code translate="no" dir="ltr">Dataset</code> object is a Python iterable. This makes it possible to consume its elements using a for loop:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-mi">8</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">8</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">])</span> <span class="devsite-syntax-n">dataset</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> WARNING: All log messages before absl::InitializeLog() is called are written to STDERR I0000 00:00:1723685859.835217 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.839003 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.842691 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.846561 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.858030 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.861635 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.865105 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.868512 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.871403 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.874859 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.878307 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685859.881840 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.098140 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.100277 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.102280 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.104281 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.106309 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.108307 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.110218 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.112117 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.114046 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.116014 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.117904 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.119808 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.158075 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.160123 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.162060 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.163993 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.165963 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.167940 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.169863 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.171778 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.173638 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.176135 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.178420 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 I0000 00:00:1723685861.180782 44933 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 <_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)> </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">elem</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">elem</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 8 3 0 8 2 1 </pre></devsite-code> <p>Or by explicitly creating a Python iterator using <code translate="no" dir="ltr">iter</code> and consuming its elements using <code translate="no" dir="ltr">next</code>:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">it</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">it</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 8 </pre></devsite-code> <p>Alternatively, dataset elements can be consumed using the <code translate="no" dir="ltr">reduce</code> transformation, which reduces all elements to produce a single result. The following example illustrates how to use the <code translate="no" dir="ltr">reduce</code> transformation to compute the sum of a dataset of integers.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">reduce</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-k">lambda</span> <span class="devsite-syntax-n">state</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">value</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">state</span> <span class="devsite-syntax-o">+</span> <span class="devsite-syntax-n">value</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 22 </pre></devsite-code> <!-- TODO(jsimsa): Talk about <a href="https://www.tensorflow.org/api_docs/python/tf/function"><code translate="no" dir="ltr">tf.function</code></a> support. --> <p><a id="dataset_structure"></a></p> <h3 id="dataset_structure" data-text="Dataset structure" tabindex="-1">Dataset structure</h3> <p>A dataset produces a sequence of <em>elements</em>, where each element is the same (nested) structure of <em>components</em>. Individual components of the structure can be of any type representable by <a href="https://www.tensorflow.org/api_docs/python/tf/TypeSpec"><code translate="no" dir="ltr">tf.TypeSpec</code></a>, including <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a>, <a href="https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor"><code translate="no" dir="ltr">tf.sparse.SparseTensor</code></a>, <a href="https://www.tensorflow.org/api_docs/python/tf/RaggedTensor"><code translate="no" dir="ltr">tf.RaggedTensor</code></a>, <a href="https://www.tensorflow.org/api_docs/python/tf/TensorArray"><code translate="no" dir="ltr">tf.TensorArray</code></a>, or <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a>.</p> <p>The Python constructs that can be used to express the (nested) structure of elements include <code translate="no" dir="ltr">tuple</code>, <code translate="no" dir="ltr">dict</code>, <code translate="no" dir="ltr">NamedTuple</code>, and <code translate="no" dir="ltr">OrderedDict</code>. In particular, <code translate="no" dir="ltr">list</code> is not a valid construct for expressing the structure of dataset elements. This is because early <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> users felt strongly about <code translate="no" dir="ltr">list</code> inputs (for example, when passed to <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors"><code translate="no" dir="ltr">tf.data.Dataset.from_tensors</code></a>) being automatically packed as tensors and <code translate="no" dir="ltr">list</code> outputs (for example, return values of user-defined functions) being coerced into a <code translate="no" dir="ltr">tuple</code>. As a consequence, if you would like a <code translate="no" dir="ltr">list</code> input to be treated as a structure, you need to convert it into <code translate="no" dir="ltr">tuple</code> and if you would like a <code translate="no" dir="ltr">list</code> output to be a single component, then you need to explicitly pack it using <a href="https://www.tensorflow.org/api_docs/python/tf/stack"><code translate="no" dir="ltr">tf.stack</code></a>.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#element_spec"><code translate="no" dir="ltr">Dataset.element_spec</code></a> property allows you to inspect the type of each element component. The property returns a <em>nested structure</em> of <a href="https://www.tensorflow.org/api_docs/python/tf/TypeSpec"><code translate="no" dir="ltr">tf.TypeSpec</code></a> objects, matching the structure of the element, which may be a single component, a tuple of components, or a nested tuple of components. For example:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset1</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">random</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">uniform</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">]))</span> <span class="devsite-syntax-n">dataset1</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">element_spec</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> TensorSpec(shape=(10,), dtype=tf.float32, name=None) </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset2</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">random</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">uniform</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">]),</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">random</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">uniform</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-n">maxval</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">dtype</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">)))</span> <span class="devsite-syntax-n">dataset2</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">element_spec</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> (TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None)) </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset3</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">zip</span><span class="devsite-syntax-p">((</span><span class="devsite-syntax-n">dataset1</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">dataset2</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">dataset3</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">element_spec</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> (TensorSpec(shape=(10,), dtype=tf.float32, name=None), (TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None))) </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-c1"># Dataset containing a sparse tensor.</span> <span class="devsite-syntax-n">dataset4</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensors</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">SparseTensor</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">indices</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">[[</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">]],</span> <span class="devsite-syntax-n">values</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-n">dense_shape</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">]))</span> <span class="devsite-syntax-n">dataset4</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">element_spec</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> SparseTensorSpec(TensorShape([3, 4]), tf.int32) </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-c1"># Use value_type to see the type of value represented by the element spec</span> <span class="devsite-syntax-n">dataset4</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">element_spec</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">value_type</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> tensorflow.python.framework.sparse_tensor.SparseTensor </pre></devsite-code> <p>The <code translate="no" dir="ltr">Dataset</code> transformations support datasets of any structure. When using the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map</code></a>, and <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#filter"><code translate="no" dir="ltr">Dataset.filter</code></a> transformations, which apply a function to each element, the element structure determines the arguments of the function:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset1</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">random</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">uniform</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-n">minval</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">maxval</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">dtype</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">dataset1</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_TensorSliceDataset element_spec=TensorSpec(shape=(10,), dtype=tf.int32, name=None)> </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">z</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dataset1</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">z</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [3 4 1 6 1 8 5 8 9 4] [2 7 6 9 2 6 6 4 9 7] [8 7 9 6 3 4 5 8 4 4] [2 1 1 1 3 9 7 8 6 8] </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset2</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">random</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">uniform</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">]),</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">random</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">uniform</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-n">maxval</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">dtype</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">)))</span> <span class="devsite-syntax-n">dataset2</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_TensorSliceDataset element_spec=(TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None))> </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset3</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">zip</span><span class="devsite-syntax-p">((</span><span class="devsite-syntax-n">dataset1</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">dataset2</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">dataset3</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_ZipDataset element_spec=(TensorSpec(shape=(10,), dtype=tf.int32, name=None), (TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None)))> </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">a</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">b</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-n">c</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dataset3</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s1">'shapes: </span><span class="devsite-syntax-si">{a.shape}</span><span class="devsite-syntax-s1">, </span><span class="devsite-syntax-si">{b.shape}</span><span class="devsite-syntax-s1">, </span><span class="devsite-syntax-si">{c.shape}</span><span class="devsite-syntax-s1">'</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">format</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">a</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">a</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">b</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">b</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">c</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">c</span><span class="devsite-syntax-p">))</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> shapes: (10,), (), (100,) shapes: (10,), (), (100,) shapes: (10,), (), (100,) shapes: (10,), (), (100,) </pre></devsite-code> <h2 id="reading_input_data" data-text="Reading input data" tabindex="-1">Reading input data</h2> <h3 id="consuming_numpy_arrays" data-text="Consuming NumPy arrays" tabindex="-1">Consuming NumPy arrays</h3> <p>Refer to the <a href="https://www.tensorflow.org/tutorials/load_data/numpy">Loading NumPy arrays</a> tutorial for more examples.</p> <p>If all of your input data fits in memory, the simplest way to create a <code translate="no" dir="ltr">Dataset</code> from them is to convert them to <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a> objects and use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices"><code translate="no" dir="ltr">Dataset.from_tensor_slices</code></a>.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">train</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">test</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">datasets</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">fashion_mnist</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">load_data</span><span class="devsite-syntax-p">()</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz 29515/29515 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz 26421880/26421880 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz 5148/5148 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz 4422102/4422102 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">images</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">train</span> <span class="devsite-syntax-n">images</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">images</span><span class="devsite-syntax-o">/</span><span class="devsite-syntax-mi">255</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">((</span><span class="devsite-syntax-n">images</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">dataset</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_TensorSliceDataset element_spec=(TensorSpec(shape=(28, 28), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.uint8, name=None))> </pre></devsite-code> <aside class="note"><strong>Note:</strong><span> The above code snippet will embed the <code translate="no" dir="ltr">features</code> and <code translate="no" dir="ltr">labels</code> arrays in your TensorFlow graph as <a href="https://www.tensorflow.org/api_docs/python/tf/constant"><code translate="no" dir="ltr">tf.constant()</code></a> operations. This works well for a small dataset, but wastes memory---because the contents of the array will be copied multiple times---and can run into the 2GB limit for the <code translate="no" dir="ltr">tf.GraphDef</code> protocol buffer.</span></aside> <h3 id="consuming_python_generators" data-text="Consuming Python generators" tabindex="-1">Consuming Python generators</h3> <p>Another common data source that can easily be ingested as a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> is the python generator.</p> <aside class="caution"><strong>Caution:</strong><span> While this is a convenient approach it has limited portability and scalability. It must run in the same python process that created the generator, and is still subject to the Python <a href="https://en.wikipedia.org/wiki/Global_interpreter_lock">GIL</a>.</span></aside><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">count</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">stop</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">i</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-mi">0</span> <span class="devsite-syntax-k">while</span> <span class="devsite-syntax-n">i<stop</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-k">yield</span> <span class="devsite-syntax-n">i</span> <span class="devsite-syntax-n">i</span> <span class="devsite-syntax-o">+=</span> <span class="devsite-syntax-mi">1</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">n</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">count</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">n</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 0 1 2 3 4 </pre></devsite-code> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator"><code translate="no" dir="ltr">Dataset.from_generator</code></a> constructor converts the python generator to a fully functional <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a>.</p> <p>The constructor takes a callable as input, not an iterator. This allows it to restart the generator when it reaches the end. It takes an optional <code translate="no" dir="ltr">args</code> argument, which is passed as the callable's arguments.</p> <p>The <code translate="no" dir="ltr">output_types</code> argument is required because <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> builds a <a href="https://www.tensorflow.org/api_docs/python/tf/Graph"><code translate="no" dir="ltr">tf.Graph</code></a> internally, and graph edges require a <code translate="no" dir="ltr">tf.dtype</code>.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">ds_counter</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_generator</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">count</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">args</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">25</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-n">output_types</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">output_shapes</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-p">)</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">count_batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">ds_counter</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">repeat</span><span class="devsite-syntax-p">()</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">count_batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14 15 16 17 18 19] [20 21 22 23 24 0 1 2 3 4] [ 5 6 7 8 9 10 11 12 13 14] [15 16 17 18 19 20 21 22 23 24] [0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14 15 16 17 18 19] [20 21 22 23 24 0 1 2 3 4] [ 5 6 7 8 9 10 11 12 13 14] [15 16 17 18 19 20 21 22 23 24] </pre></devsite-code> <p>The <code translate="no" dir="ltr">output_shapes</code> argument is not <em>required</em> but is highly recommended as many TensorFlow operations do not support tensors with an unknown rank. If the length of a particular axis is unknown or variable, set it as <code translate="no" dir="ltr">None</code> in the <code translate="no" dir="ltr">output_shapes</code>.</p> <p>It's also important to note that the <code translate="no" dir="ltr">output_shapes</code> and <code translate="no" dir="ltr">output_types</code> follow the same nesting rules as other dataset methods.</p> <p>Here is an example generator that demonstrates both aspects: it returns tuples of arrays, where the second array is a vector with unknown length.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">gen_series</span><span class="devsite-syntax-p">():</span> <span class="devsite-syntax-n">i</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-mi">0</span> <span class="devsite-syntax-k">while</span> <span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">size</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">np</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">random</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">randint</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">yield</span> <span class="devsite-syntax-n">i</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">np</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">random</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">normal</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">size</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">size</span><span class="devsite-syntax-p">,))</span> <span class="devsite-syntax-n">i</span> <span class="devsite-syntax-o">+=</span> <span class="devsite-syntax-mi">1</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">i</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">series</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">gen_series</span><span class="devsite-syntax-p">():</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">i</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s2">":"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-nb">str</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">series</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-k">if</span> <span class="devsite-syntax-n">i</span> > <span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-k">break</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 0 : [1.1274] 1 : [-0.5822 0.8497 -1.3594 0.2083 -0.3007 1.2171 -0.3551] 2 : [-1.2016 -0.1085 0.4088 0.0801 1.4901 -2.3102] 3 : [ 0.5816 -0.6447 -0.9673 0.5282 0.52 -0.2634 0.3001 0.8753] 4 : [ 0.0888 0.071 1.26 -0.347 -0.2643 -1.0757 0.4192] 5 : [ 0.4911 0.8377 0.3576 -0.0351 0.9663] 6 : [-0.1996 0.5808 0.4589 1.8229 -0.5712] </pre></devsite-code> <p>The first output is an <code translate="no" dir="ltr">int32</code> the second is a <code translate="no" dir="ltr">float32</code>.</p> <p>The first item is a scalar, shape <code translate="no" dir="ltr">()</code>, and the second is a vector of unknown length, shape <code translate="no" dir="ltr">(None,)</code></p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">ds_series</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_generator</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">gen_series</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">output_types</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">float32</span><span class="devsite-syntax-p">),</span> <span class="devsite-syntax-n">output_shapes</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">((),</span> <span class="devsite-syntax-p">(</span><span class="devsite-syntax-kc">None</span><span class="devsite-syntax-p">,)))</span> <span class="devsite-syntax-n">ds_series</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_FlatMapDataset element_spec=(TensorSpec(shape=(), dtype=tf.int32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))> </pre></devsite-code> <p>Now it can be used like a regular <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a>. Note that when batching a dataset with a variable shape, you need to use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#padded_batch"><code translate="no" dir="ltr">Dataset.padded_batch</code></a>.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">ds_series_batch</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">ds_series</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shuffle</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">20</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">padded_batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">ids</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">sequence_batch</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">ds_series_batch</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">ids</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">sequence_batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [ 5 19 20 11 4 10 17 8 27 18] [[-0.7479 0.867 -0.0558 -1.0825 -0.4113 0.0312 0. ] [-1.0498 -0.3941 0. 0. 0. 0. 0. ] [-0.2709 0.0236 0.0746 0.3704 0. 0. 0. ] [ 1.6525 -0.861 0.5642 0.9961 0.7463 0. 0. ] [ 0.4122 -0.118 1.5491 1.9578 0. 0. 0. ] [-1.6237 1.3636 -0.2079 0. 0. 0. 0. ] [ 0. 0. 0. 0. 0. 0. 0. ] [ 0. 0. 0. 0. 0. 0. 0. ] [-1.3268 0.9881 0.531 0. 0. 0. 0. ] [ 0.0284 -1.4974 -0.545 -1.2795 0.7032 1.4058 0.1412]] </pre></devsite-code> <p>For a more realistic example, try wrapping <a href="https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator"><code translate="no" dir="ltr">preprocessing.image.ImageDataGenerator</code></a> as a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a>.</p> <p>First download the data:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">flowers</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">utils</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">get_file</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-s1">'flower_photos'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s1">'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">untar</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz 228813984/228813984 ━━━━━━━━━━━━━━━━━━━━ 2s 0us/step </pre></devsite-code> <p>Create the <a href="https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator"><code translate="no" dir="ltr">image.ImageDataGenerator</code></a></p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">img_gen</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">preprocessing</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">ImageDataGenerator</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">rescale</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mf">1.</span><span class="devsite-syntax-o">/</span><span class="devsite-syntax-mi">255</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">rotation_range</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">20</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">images</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">img_gen</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">flow_from_directory</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">flowers</span><span class="devsite-syntax-p">))</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Found 3670 images belonging to 5 classes. </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">images</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">dtype</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">images</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">labels</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">dtype</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> float32 (32, 256, 256, 3) float32 (32, 5) </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_generator</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-k">lambda</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">img_gen</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">flow_from_directory</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">flowers</span><span class="devsite-syntax-p">),</span> <span class="devsite-syntax-n">output_types</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">float32</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">float32</span><span class="devsite-syntax-p">),</span> <span class="devsite-syntax-n">output_shapes</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-mi">32</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">256</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">256</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">32</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">])</span> <span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">element_spec</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> (TensorSpec(shape=(32, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(32, 5), dtype=tf.float32, name=None)) </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">images</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s1">'images.shape: '</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">images</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s1">'labels.shape: '</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Found 3670 images belonging to 5 classes. images.shape: (32, 256, 256, 3) labels.shape: (32, 5) </pre></devsite-code> <h3 id="consuming_tfrecord_data" data-text="Consuming TFRecord data" tabindex="-1">Consuming TFRecord data</h3> <p>Refer to the <a href="https://www.tensorflow.org/tutorials/load_data/tfrecord">Loading TFRecords</a> tutorial for an end-to-end example.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API supports a variety of file formats so that you can process large datasets that do not fit in memory. For example, the TFRecord file format is a simple record-oriented binary format that many TensorFlow applications use for training data. The <a href="https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset"><code translate="no" dir="ltr">tf.data.TFRecordDataset</code></a> class enables you to stream over the contents of one or more TFRecord files as part of an input pipeline.</p> <p>Here is an example using the test file from the French Street Name Signs (FSNS).</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-c1"># Creates a dataset that reads all of the examples from two files.</span> <span class="devsite-syntax-n">fsns_test_file</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">utils</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">get_file</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"fsns.tfrec"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s2">"https://storage.googleapis.com/download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001"</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001 7904079/7904079 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step </pre></devsite-code> <p>The <code translate="no" dir="ltr">filenames</code> argument to the <code translate="no" dir="ltr">TFRecordDataset</code> initializer can either be a string, a list of strings, or a <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a> of strings. Therefore if you have two sets of files for training and validation purposes, you can create a factory method that produces the dataset, taking filenames as an input argument:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">TFRecordDataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">filenames</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-n">fsns_test_file</span><span class="devsite-syntax-p">])</span> <span class="devsite-syntax-n">dataset</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)> </pre></devsite-code> <p>Many TensorFlow projects use serialized <a href="https://www.tensorflow.org/api_docs/python/tf/train/Example"><code translate="no" dir="ltr">tf.train.Example</code></a> records in their TFRecord files. These need to be decoded before they can be inspected:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">raw_example</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">parsed</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">train</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Example</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">FromString</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">raw_example</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> <span class="devsite-syntax-n">parsed</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">features</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">feature</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'image/text'</span><span class="devsite-syntax-p">]</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> bytes_list { value: "Rue Perreyon" } </pre></devsite-code> <h3 id="consuming_text_data" data-text="Consuming text data" tabindex="-1">Consuming text data</h3> <p>Refer to the <a href="https://www.tensorflow.org/tutorials/load_data/text">Load text</a> tutorial for an end-to-end example.</p> <p>Many datasets are distributed as one or more text files. The <a href="https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset"><code translate="no" dir="ltr">tf.data.TextLineDataset</code></a> provides an easy way to extract lines from one or more text files. Given one or more filenames, a <code translate="no" dir="ltr">TextLineDataset</code> will produce one string-valued element per line of those files.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">directory_url</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-s1">'https://storage.googleapis.com/download.tensorflow.org/data/illiad/'</span> <span class="devsite-syntax-n">file_names</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'cowper.txt'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s1">'derby.txt'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s1">'butler.txt'</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-n">file_paths</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">utils</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">get_file</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">file_name</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">directory_url</span> <span class="devsite-syntax-o">+</span> <span class="devsite-syntax-n">file_name</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">file_name</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">file_names</span> <span class="devsite-syntax-p">]</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/cowper.txt 815980/815980 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/derby.txt 809730/809730 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/butler.txt 807992/807992 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">TextLineDataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">file_paths</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <p>Here are the first few lines of the first file:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">line</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">line</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> b"\xef\xbb\xbfAchilles sing, O Goddess! Peleus' son;" b'His wrath pernicious, who ten thousand woes' b"Caused to Achaia's host, sent many a soul" b'Illustrious into Ades premature,' b'And Heroes gave (so stood the will of Jove)' </pre></devsite-code> <p>To alternate lines between files use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#interleave"><code translate="no" dir="ltr">Dataset.interleave</code></a>. This makes it easier to shuffle files together. Here are the first, second and third lines from each translation:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">files_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">file_paths</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">lines_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">files_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">interleave</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">TextLineDataset</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">cycle_length</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">i</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">line</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-nb">enumerate</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">lines_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">9</span><span class="devsite-syntax-p">)):</span> <span class="devsite-syntax-k">if</span> <span class="devsite-syntax-n">i</span> <span class="devsite-syntax-o">%</span> <span class="devsite-syntax-mi">3</span> <span class="devsite-syntax-o">==</span> <span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">line</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> b"\xef\xbb\xbfAchilles sing, O Goddess! Peleus' son;" b"\xef\xbb\xbfOf Peleus' son, Achilles, sing, O Muse," b'\xef\xbb\xbfSing, O goddess, the anger of Achilles son of Peleus, that brought' b'His wrath pernicious, who ten thousand woes' b'The vengeance, deep and deadly; whence to Greece' b'countless ills upon the Achaeans. Many a brave soul did it send' b"Caused to Achaia's host, sent many a soul" b'Unnumbered ills arose; which many a soul' b'hurrying down to Hades, and many a hero did it yield a prey to dogs and' </pre></devsite-code> <p>By default, a <code translate="no" dir="ltr">TextLineDataset</code> yields <em>every</em> line of each file, which may not be desirable, for example, if the file starts with a header line, or contains comments. These lines can be removed using the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#skip"><code translate="no" dir="ltr">Dataset.skip()</code></a> or <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#filter"><code translate="no" dir="ltr">Dataset.filter</code></a> transformations. Here, you skip the first line, then filter to find only survivors.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">titanic_file</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">utils</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">get_file</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"train.csv"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s2">"https://storage.googleapis.com/tf-datasets/titanic/train.csv"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">titanic_lines</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">TextLineDataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">titanic_file</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv 30874/30874 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">line</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">titanic_lines</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">line</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> b'survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone' b'0,male,22.0,1,0,7.25,Third,unknown,Southampton,n' b'1,female,38.0,1,0,71.2833,First,C,Cherbourg,n' b'1,female,26.0,0,0,7.925,Third,unknown,Southampton,y' b'1,female,35.0,1,0,53.1,First,C,Southampton,n' b'0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y' b'0,male,2.0,3,1,21.075,Third,unknown,Southampton,n' b'1,female,27.0,0,2,11.1333,Third,unknown,Southampton,n' b'1,female,14.0,1,0,30.0708,Second,unknown,Cherbourg,n' b'1,female,4.0,1,1,16.7,Third,G,Southampton,n' </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">survived</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">line</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">not_equal</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">strings</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">substr</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">line</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">),</span> <span class="devsite-syntax-s2">"0"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">survivors</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">titanic_lines</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">skip</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">filter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">survived</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">line</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">survivors</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">line</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> b'1,female,38.0,1,0,71.2833,First,C,Cherbourg,n' b'1,female,26.0,0,0,7.925,Third,unknown,Southampton,y' b'1,female,35.0,1,0,53.1,First,C,Southampton,n' b'1,female,27.0,0,2,11.1333,Third,unknown,Southampton,n' b'1,female,14.0,1,0,30.0708,Second,unknown,Cherbourg,n' b'1,female,4.0,1,1,16.7,Third,G,Southampton,n' b'1,male,28.0,0,0,13.0,Second,unknown,Southampton,y' b'1,female,28.0,0,0,7.225,Third,unknown,Cherbourg,y' b'1,male,28.0,0,0,35.5,First,A,Southampton,y' b'1,female,38.0,1,5,31.3875,Third,unknown,Southampton,n' </pre></devsite-code> <h3 id="consuming_csv_data" data-text="Consuming CSV data" tabindex="-1">Consuming CSV data</h3> <p>Refer to the <a href="https://www.tensorflow.org/tutorials/load_data/csv">Loading CSV Files</a> and <a href="https://www.tensorflow.org/tutorials/load_data/pandas_dataframe">Loading Pandas DataFrames</a> tutorials for more examples.</p> <p>The CSV file format is a popular format for storing tabular data in plain text.</p> <p>For example:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">titanic_file</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">utils</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">get_file</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"train.csv"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s2">"https://storage.googleapis.com/tf-datasets/titanic/train.csv"</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">df</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">pd</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">read_csv</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">titanic_file</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">df</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">head</span><span class="devsite-syntax-p">()</span> </code></pre></devsite-code> <devsite-iframe><iframe src="https://www.tensorflow.org/frame/guide/data_a9615c0f2fcea96ba5758f00e5decfd7a95fb2936ed60626ecd0888cf9e08c89.frame" class="framebox inherit-locale " allow="clipboard-write https://tensorflow-dot-devsite-v2-prod-3p.appspot.com" allowfullscreen is-upgraded></iframe></devsite-iframe> <p>If your data fits in memory the same <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices"><code translate="no" dir="ltr">Dataset.from_tensor_slices</code></a> method works on dictionaries, allowing this data to be easily imported:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">titanic_slices</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">dict</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">df</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">feature_batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">titanic_slices</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">key</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">value</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">feature_batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">items</span><span class="devsite-syntax-p">():</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">" </span><span class="devsite-syntax-si">{!r:20s}</span><span class="devsite-syntax-s2">: </span><span class="devsite-syntax-si">{}</span><span class="devsite-syntax-s2">"</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">format</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">key</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">value</span><span class="devsite-syntax-p">))</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 'survived' : 0 'sex' : b'male' 'age' : 22.0 'n_siblings_spouses': 1 'parch' : 0 'fare' : 7.25 'class' : b'Third' 'deck' : b'unknown' 'embark_town' : b'Southampton' 'alone' : b'n' </pre></devsite-code> <p>A more scalable approach is to load from disk as necessary.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> module provides methods to extract records from one or more CSV files that comply with <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/experimental/make_csv_dataset"><code translate="no" dir="ltr">tf.data.experimental.make_csv_dataset</code></a> function is the high-level interface for reading sets of CSV files. It supports column type inference and many other features, like batching and shuffling, to make usage simple.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">titanic_batches</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">experimental</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">make_csv_dataset</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">titanic_file</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">batch_size</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label_name</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s2">"survived"</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">feature_batch</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label_batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">titanic_batches</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"'survived': </span><span class="devsite-syntax-si">{}</span><span class="devsite-syntax-s2">"</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">format</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">label_batch</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"features:"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">key</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">value</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">feature_batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">items</span><span class="devsite-syntax-p">():</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">" </span><span class="devsite-syntax-si">{!r:20s}</span><span class="devsite-syntax-s2">: </span><span class="devsite-syntax-si">{}</span><span class="devsite-syntax-s2">"</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">format</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">key</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">value</span><span class="devsite-syntax-p">))</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 'survived': [0 0 0 0] features: 'sex' : [b'male' b'male' b'male' b'male'] 'age' : [28. 46. 28. 26.] 'n_siblings_spouses': [0 1 0 0] 'parch' : [1 0 0 0] 'fare' : [33. 61.175 8.05 7.8875] 'class' : [b'Second' b'First' b'Third' b'Third'] 'deck' : [b'unknown' b'E' b'unknown' b'unknown'] 'embark_town' : [b'Southampton' b'Southampton' b'Southampton' b'Southampton'] 'alone' : [b'n' b'n' b'y' b'y'] </pre></devsite-code> <p>You can use the <code translate="no" dir="ltr">select_columns</code> argument if you only need a subset of columns.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">titanic_batches</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">experimental</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">make_csv_dataset</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">titanic_file</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">batch_size</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label_name</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s2">"survived"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">select_columns</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'class'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s1">'fare'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s1">'survived'</span><span class="devsite-syntax-p">])</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">feature_batch</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label_batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">titanic_batches</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"'survived': </span><span class="devsite-syntax-si">{}</span><span class="devsite-syntax-s2">"</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">format</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">label_batch</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">key</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">value</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">feature_batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">items</span><span class="devsite-syntax-p">():</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">" </span><span class="devsite-syntax-si">{!r:20s}</span><span class="devsite-syntax-s2">: </span><span class="devsite-syntax-si">{}</span><span class="devsite-syntax-s2">"</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">format</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">key</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">value</span><span class="devsite-syntax-p">))</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 'survived': [1 1 0 0] 'fare' : [10.5 35.5 12.875 29.125] 'class' : [b'Second' b'First' b'Second' b'Third'] </pre></devsite-code> <p>There is also a lower-level <a href="https://www.tensorflow.org/api_docs/python/tf/data/experimental/CsvDataset"><code translate="no" dir="ltr">experimental.CsvDataset</code></a> class which provides finer grained control. It does not support column type inference. Instead you must specify the type of each column.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">titanic_types</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">string</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">float32</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">float32</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">string</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">string</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">string</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">string</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">experimental</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">CsvDataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">titanic_file</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">titanic_types</span> <span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">header</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">line</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-n">item</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">item</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">line</span><span class="devsite-syntax-p">])</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [0, b'male', 22.0, 1, 0, 7.25, b'Third', b'unknown', b'Southampton', b'n'] [1, b'female', 38.0, 1, 0, 71.2833, b'First', b'C', b'Cherbourg', b'n'] [1, b'female', 26.0, 0, 0, 7.925, b'Third', b'unknown', b'Southampton', b'y'] [1, b'female', 35.0, 1, 0, 53.1, b'First', b'C', b'Southampton', b'n'] [0, b'male', 28.0, 0, 0, 8.4583, b'Third', b'unknown', b'Queenstown', b'y'] [0, b'male', 2.0, 3, 1, 21.075, b'Third', b'unknown', b'Southampton', b'n'] [1, b'female', 27.0, 0, 2, 11.1333, b'Third', b'unknown', b'Southampton', b'n'] [1, b'female', 14.0, 1, 0, 30.0708, b'Second', b'unknown', b'Cherbourg', b'n'] [1, b'female', 4.0, 1, 1, 16.7, b'Third', b'G', b'Southampton', b'n'] [0, b'male', 20.0, 0, 0, 8.05, b'Third', b'unknown', b'Southampton', b'y'] </pre></devsite-code> <p>If some columns are empty, this low-level interface allows you to provide default values instead of column types.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-o">%%</span><span class="devsite-syntax-n">writefile</span> <span class="devsite-syntax-n">missing</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">csv</span> <span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">4</span> <span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">4</span> <span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">,,</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">4</span> <span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">,,</span><span class="devsite-syntax-mi">4</span> <span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-p">,,,</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Writing missing.csv </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-c1"># Creates a dataset that reads all of the records from two CSV files, each with</span> <span class="devsite-syntax-c1"># four float columns which may have missing values.</span> <span class="devsite-syntax-n">record_defaults</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">999</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">999</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">999</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mi">999</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">experimental</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">CsvDataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"missing.csv"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">record_defaults</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-k">lambda</span> <span class="devsite-syntax-o">*</span><span class="devsite-syntax-n">items</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">stack</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">items</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">dataset</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_MapDataset element_spec=TensorSpec(shape=(4,), dtype=tf.int32, name=None)> </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">line</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">line</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [1 2 3 4] [999 2 3 4] [ 1 999 3 4] [ 1 2 999 4] [ 1 2 3 999] [999 999 999 999] </pre></devsite-code> <p>By default, a <code translate="no" dir="ltr">CsvDataset</code> yields <em>every</em> column of <em>every</em> line of the file, which may not be desirable, for example if the file starts with a header line that should be ignored, or if some columns are not required in the input. These lines and fields can be removed with the <code translate="no" dir="ltr">header</code> and <code translate="no" dir="ltr">select_cols</code> arguments respectively.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-c1"># Creates a dataset that reads all of the records from two CSV files with</span> <span class="devsite-syntax-c1"># headers, extracting float data from columns 2 and 4.</span> <span class="devsite-syntax-n">record_defaults</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">999</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">999</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-c1"># Only provide defaults for the selected columns</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">experimental</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">CsvDataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"missing.csv"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">record_defaults</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">select_cols</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">])</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-k">lambda</span> <span class="devsite-syntax-o">*</span><span class="devsite-syntax-n">items</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">stack</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">items</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">dataset</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_MapDataset element_spec=TensorSpec(shape=(2,), dtype=tf.int32, name=None)> </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">line</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">line</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [2 4] [2 4] [999 4] [2 4] [ 2 999] [999 999] </pre></devsite-code> <h3 id="consuming_sets_of_files" data-text="Consuming sets of files" tabindex="-1">Consuming sets of files</h3> <p>There are many datasets distributed as a set of files, where each file is an example.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">flowers_root</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">utils</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">get_file</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-s1">'flower_photos'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s1">'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">untar</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">flowers_root</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">pathlib</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Path</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">flowers_root</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code><aside class="note"><strong>Note:</strong><span> these images are licensed CC-BY, see LICENSE.txt for details.</span></aside> <p>The root directory contains a directory for each class:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">item</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">flowers_root</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">glob</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"*"</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">item</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">name</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> daisy tulips sunflowers LICENSE.txt dandelion roses </pre></devsite-code> <p>The files in each class directory are examples:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">list_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">list_files</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">str</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">flowers_root</span><span class="devsite-syntax-o">/</span><span class="devsite-syntax-s1">'*/*'</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">f</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">list_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">f</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> b'/home/kbuilder/.keras/datasets/flower_photos/tulips/4955884820_7e4ce4d7e5_m.jpg' b'/home/kbuilder/.keras/datasets/flower_photos/dandelion/6250363717_17732e992e_n.jpg' b'/home/kbuilder/.keras/datasets/flower_photos/tulips/14278331403_4c475f9a9b.jpg' b'/home/kbuilder/.keras/datasets/flower_photos/dandelion/480621885_4c8b50fa11_m.jpg' b'/home/kbuilder/.keras/datasets/flower_photos/tulips/5716293002_a8be6a6dd3_n.jpg' </pre></devsite-code> <p>Read the data using the <a href="https://www.tensorflow.org/api_docs/python/tf/io/read_file"><code translate="no" dir="ltr">tf.io.read_file</code></a> function and extract the label from the path, returning <code translate="no" dir="ltr">(image, label)</code> pairs:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">process_path</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">file_path</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">strings</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">split</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">file_path</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">os</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">sep</span><span class="devsite-syntax-p">)[</span><span class="devsite-syntax-o">-</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">io</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">read_file</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">file_path</span><span class="devsite-syntax-p">),</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-n">labeled_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">list_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">process_path</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">image_raw</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label_text</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">labeled_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">repr</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image_raw</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()[:</span><span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">]))</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">label_text</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x03\x02\x02\x03\x02\x02\x03\x03\x03\x03\x04\x03\x03\x04\x05\x08\x05\x05\x04\x04\x05\n\x07\x07\x06\x08\x0c\n\x0c\x0c\x0b\n\x0b\x0b\r\x0e\x12\x10\r\x0e\x11\x0e\x0b\x0b\x10\x16\x10\x11\x13\x14\x15\x15\x15\x0c\x0f\x17\x18\x16\x14\x18\x12\x14\x15\x14\xff\xdb\x00C\x01\x03\x04\x04\x05\x04\x05' b'dandelion' </pre></devsite-code> <!-- TODO(mrry): Add this section. ### Handling text data with unusual sizes --> <h2 id="batching_dataset_elements" data-text="Batching dataset elements" tabindex="-1">Batching dataset elements</h2> <h3 id="simple_batching" data-text="Simple batching" tabindex="-1">Simple batching</h3> <p>The simplest form of batching stacks <code translate="no" dir="ltr">n</code> consecutive elements of a dataset into a single element. The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch()</code></a> transformation does exactly this, with the same constraints as the <a href="https://www.tensorflow.org/api_docs/python/tf/stack"><code translate="no" dir="ltr">tf.stack()</code></a> operator, applied to each component of the elements: i.e., for each component <em>i</em>, all elements must have a tensor of the exact same shape.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">inc_dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">dec_dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-o">-</span><span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-o">-</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">zip</span><span class="devsite-syntax-p">((</span><span class="devsite-syntax-n">inc_dataset</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">dec_dataset</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">batched_dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">batched_dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-n">arr</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">arr</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">])</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [array([0, 1, 2, 3]), array([ 0, -1, -2, -3])] [array([4, 5, 6, 7]), array([-4, -5, -6, -7])] [array([ 8, 9, 10, 11]), array([ -8, -9, -10, -11])] [array([12, 13, 14, 15]), array([-12, -13, -14, -15])] </pre></devsite-code> <p>While <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> tries to propagate shape information, the default settings of <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> result in an unknown batch size because the last batch may not be full. Note the <code translate="no" dir="ltr">None</code>s in the shape:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">batched_dataset</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_BatchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))> </pre></devsite-code> <p>Use the <code translate="no" dir="ltr">drop_remainder</code> argument to ignore that last batch, and get full shape propagation:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">batched_dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">7</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">drop_remainder</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">batched_dataset</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_BatchDataset element_spec=(TensorSpec(shape=(7,), dtype=tf.int64, name=None), TensorSpec(shape=(7,), dtype=tf.int64, name=None))> </pre></devsite-code> <h3 id="batching_tensors_with_padding" data-text="Batching tensors with padding" tabindex="-1">Batching tensors with padding</h3> <p>The above recipe works for tensors that all have the same size. However, many models (including sequence models) work with input data that can have varying size (for example, sequences of different lengths). To handle this case, the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#padded_batch"><code translate="no" dir="ltr">Dataset.padded_batch</code></a> transformation enables you to batch tensors of different shapes by specifying one or more dimensions in which they may be padded.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-k">lambda</span> <span class="devsite-syntax-n">x</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">fill</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">cast</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">x</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">)],</span> <span class="devsite-syntax-n">x</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">padded_batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">4</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">padded_shapes</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-kc">None</span><span class="devsite-syntax-p">,))</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">()</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [[0 0 0] [1 0 0] [2 2 0] [3 3 3]] [[4 4 4 4 0 0 0] [5 5 5 5 5 0 0] [6 6 6 6 6 6 0] [7 7 7 7 7 7 7]] </pre></devsite-code> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#padded_batch"><code translate="no" dir="ltr">Dataset.padded_batch</code></a> transformation allows you to set different padding for each dimension of each component, and it may be variable-length (signified by <code translate="no" dir="ltr">None</code> in the example above) or constant-length. It is also possible to override the padding value, which defaults to 0.</p> <!-- TODO(mrry): Add this section. ### Dense ragged -> tf.SparseTensor --> <h2 id="training_workflows" data-text="Training workflows" tabindex="-1">Training workflows</h2> <h3 id="processing_multiple_epochs" data-text="Processing multiple epochs" tabindex="-1">Processing multiple epochs</h3> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> API offers two main ways to process multiple epochs of the same data.</p> <p>The simplest way to iterate over a dataset in multiple epochs is to use the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat()</code></a> transformation. First, create a dataset of titanic data:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">titanic_file</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">utils</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">get_file</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"train.csv"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s2">"https://storage.googleapis.com/tf-datasets/titanic/train.csv"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">titanic_lines</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">TextLineDataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">titanic_file</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">plot_batch_sizes</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">ds</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">batch_sizes</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">ds</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">bar</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">len</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">batch_sizes</span><span class="devsite-syntax-p">)),</span> <span class="devsite-syntax-n">batch_sizes</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">xlabel</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s1">'Batch number'</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">ylabel</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s1">'Batch size'</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <p>Applying the <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat()</code></a> transformation with no arguments will repeat the input indefinitely.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat</code></a> transformation concatenates its arguments without signaling the end of one epoch and the beginning of the next epoch. Because of this a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> applied after <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat</code></a> will yield batches that straddle epoch boundaries:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">titanic_batches</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">titanic_lines</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">repeat</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">128</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plot_batch_sizes</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">titanic_batches</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <p><img src="/static/guide/data_files/output_nZ0G1cztoSHX_0.png" alt="png"></p> <p>If you need clear epoch separation, put <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> before the repeat:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">titanic_batches</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">titanic_lines</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">128</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">repeat</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plot_batch_sizes</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">titanic_batches</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <p><img src="/static/guide/data_files/output_wmbmdK1qoSHS_0.png" alt="png"></p> <p>If you would like to perform a custom computation (for example, to collect statistics) at the end of each epoch then it's simplest to restart the dataset iteration on each epoch:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">epochs</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-mi">3</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">titanic_lines</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">128</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">epoch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-nb">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">epochs</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"End of epoch: "</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">epoch</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> (128,) (128,) (128,) (128,) (116,) End of epoch: 0 (128,) (128,) (128,) (128,) (116,) End of epoch: 1 (128,) (128,) (128,) (128,) (116,) End of epoch: 2 </pre></devsite-code> <h3 id="randomly_shuffling_input_data" data-text="Randomly shuffling input data" tabindex="-1">Randomly shuffling input data</h3> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle"><code translate="no" dir="ltr">Dataset.shuffle()</code></a> transformation maintains a fixed-size buffer and chooses the next element uniformly at random from that buffer.</p> <aside class="note"><strong>Note:</strong><span> While large buffer_sizes shuffle more thoroughly, they can take a lot of memory, and significant time to fill. Consider using <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#interleave"><code translate="no" dir="ltr">Dataset.interleave</code></a> across files if this becomes a problem.</span></aside> <p>Add an index to the dataset so you can see the effect:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">lines</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">TextLineDataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">titanic_file</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">counter</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">experimental</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Counter</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">zip</span><span class="devsite-syntax-p">((</span><span class="devsite-syntax-n">counter</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">lines</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shuffle</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">buffer_size</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">20</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">dataset</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> WARNING:tensorflow:From /tmpfs/tmp/ipykernel_44933/4092668703.py:2: CounterV2 (from tensorflow.python.data.experimental.ops.counter) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.data.Dataset.counter(...)` instead. <_BatchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.string, name=None))> </pre></devsite-code> <p>Since the <code translate="no" dir="ltr">buffer_size</code> is 100, and the batch size is 20, the first batch contains no elements with an index over 120.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">n</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-n">line_batch</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">n</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [ 99 18 1 29 66 88 47 30 80 46 68 44 35 40 33 95 108 105 38 113] </pre></devsite-code> <p>As with <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> the order relative to <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat</code></a> matters.</p> <p><a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle"><code translate="no" dir="ltr">Dataset.shuffle</code></a> doesn't signal the end of an epoch until the shuffle buffer is empty. So a shuffle placed before a repeat will show every element of one epoch before moving to the next:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">zip</span><span class="devsite-syntax-p">((</span><span class="devsite-syntax-n">counter</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">lines</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">shuffled</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shuffle</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">buffer_size</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">repeat</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"Here are the item ID's near the epoch boundary:</span><span class="devsite-syntax-se">\n</span><span class="devsite-syntax-s2">"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">n</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">line_batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">shuffled</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">skip</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">60</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">n</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Here are the item ID's near the epoch boundary: [469 414 497 584 615 612 625 627 603 621] [582 553 343 602 626 567 486 593 616 525] [557 576 478 533 591 398 484 431] [66 43 51 18 94 3 76 52 90 57] [ 0 101 71 86 56 17 33 70 110 75] </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">shuffle_repeat</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-n">n</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">mean</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">n</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">line_batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">shuffled</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">plot</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">shuffle_repeat</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s2">"shuffle().repeat()"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">ylabel</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"Mean item ID"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">legend</span><span class="devsite-syntax-p">()</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <matplotlib.legend.Legend at 0x7f373c471af0> </pre></devsite-code> <p><img src="/static/guide/data_files/output_H9hlE-lGoSGz_1.png" alt="png"></p> <p>But a repeat before a shuffle mixes the epoch boundaries together:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">zip</span><span class="devsite-syntax-p">((</span><span class="devsite-syntax-n">counter</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">lines</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">shuffled</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">repeat</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shuffle</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">buffer_size</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">100</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"Here are the item ID's near the epoch boundary:</span><span class="devsite-syntax-se">\n</span><span class="devsite-syntax-s2">"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">n</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">line_batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">shuffled</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">skip</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">55</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">15</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">n</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Here are the item ID's near the epoch boundary: [583 415 1 542 563 9 620 622 551 548] [589 592 365 571 33 557 618 31 541 27] [537 24 615 43 18 550 11 8 39 369] [601 38 485 20 627 46 22 23 322 608] [626 590 491 63 29 564 17 19 617 66] [508 580 72 45 57 54 556 62 14 511] [623 73 75 79 599 372 21 83 547 26] [486 4 0 573 74 49 0 53 95 34] [ 60 605 15 90 99 549 16 50 91 80] [106 108 112 297 561 44 52 82 86 71] [581 77 117 28 567 10 30 3 81 89] [587 32 102 7 135 51 113 110 114 451] [ 59 64 68 116 76 306 367 128 552 136] [111 569 522 5 67 616 154 131 512 37] [539 103 142 78 85 2 87 12 149 137] </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">repeat_shuffle</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-n">n</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">mean</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">n</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">line_batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">shuffled</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">plot</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">shuffle_repeat</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s2">"shuffle().repeat()"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">plot</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">repeat_shuffle</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s2">"repeat().shuffle()"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">ylabel</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"Mean item ID"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">legend</span><span class="devsite-syntax-p">()</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <matplotlib.legend.Legend at 0x7f373c462b20> </pre></devsite-code> <p><img src="/static/guide/data_files/output_VAM4cbpZoSGL_1.png" alt="png"></p> <h2 id="preprocessing_data" data-text="Preprocessing data" tabindex="-1">Preprocessing data</h2> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map(f)</code></a> transformation produces a new dataset by applying a given function <code translate="no" dir="ltr">f</code> to each element of the input dataset. It is based on the <a href="https://en.wikipedia.org/wiki/Map_(higher-order_function)"><code translate="no" dir="ltr">map()</code></a> function that is commonly applied to lists (and other structures) in functional programming languages. The function <code translate="no" dir="ltr">f</code> takes the <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a> objects that represent a single element in the input, and returns the <a href="https://www.tensorflow.org/api_docs/python/tf/Tensor"><code translate="no" dir="ltr">tf.Tensor</code></a> objects that will represent a single element in the new dataset. Its implementation uses standard TensorFlow operations to transform one element into another.</p> <p>This section covers common examples of how to use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map()</code></a>.</p> <h3 id="decoding_image_data_and_resizing_it" data-text="Decoding image data and resizing it" tabindex="-1">Decoding image data and resizing it</h3> <!-- TODO(markdaoust): link to image augmentation when it exists --> <p>When training a neural network on real-world image data, it is often necessary to convert images of different sizes to a common size, so that they may be batched into a fixed size.</p> <p>Rebuild the flower filenames dataset:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">list_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">list_files</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">str</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">flowers_root</span><span class="devsite-syntax-o">/</span><span class="devsite-syntax-s1">'*/*'</span><span class="devsite-syntax-p">))</span> </code></pre></devsite-code> <p>Write a function that manipulates the dataset elements.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-c1"># Reads an image from a file, decodes it into a dense tensor, and resizes it</span> <span class="devsite-syntax-c1"># to a fixed shape.</span> <span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">parse_image</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">filename</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">parts</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">strings</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">split</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">filename</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">os</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">sep</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">parts</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-o">-</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-n">image</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">io</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">read_file</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">filename</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">image</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">io</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">decode_jpeg</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">image</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">convert_image_dtype</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">float32</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">image</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">resize</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">128</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">128</span><span class="devsite-syntax-p">])</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> </code></pre></devsite-code> <p>Test that it works.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">file_path</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">list_ds</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">parse_image</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">file_path</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">show</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">figure</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">imshow</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">title</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">decode</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s1">'utf-8'</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">axis</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s1">'off'</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">show</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <p><img src="/static/guide/data_files/output_y8xuN_HBzGup_0.png" alt="png"></p> <p>Map it over the dataset.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">images_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">list_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">parse_image</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">images_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">show</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <p><img src="/static/guide/data_files/output_SzO8LI_H5Sk__0.png" alt="png"></p> <p><img src="/static/guide/data_files/output_SzO8LI_H5Sk__1.png" alt="png"></p> <h3 id="applying_arbitrary_python_logic" data-text="Applying arbitrary Python logic" tabindex="-1">Applying arbitrary Python logic</h3> <p>For performance reasons, use TensorFlow operations for preprocessing your data whenever possible. However, it is sometimes useful to call external Python libraries when parsing your input data. You can use the <a href="https://www.tensorflow.org/api_docs/python/tf/py_function"><code translate="no" dir="ltr">tf.py_function</code></a> operation in a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map</code></a> transformation.</p> <p>For example, if you want to apply a random rotation, the <a href="https://www.tensorflow.org/api_docs/python/tf/image"><code translate="no" dir="ltr">tf.image</code></a> module only has <a href="https://www.tensorflow.org/api_docs/python/tf/image/rot90"><code translate="no" dir="ltr">tf.image.rot90</code></a>, which is not very useful for image augmentation.</p> <aside class="note"><strong>Note:</strong><span> <code translate="no" dir="ltr">tensorflow_addons</code> has a TensorFlow compatible <code translate="no" dir="ltr">rotate</code> in <code translate="no" dir="ltr">tensorflow_addons.image.rotate</code>.</span></aside> <p>To demonstrate <a href="https://www.tensorflow.org/api_docs/python/tf/py_function"><code translate="no" dir="ltr">tf.py_function</code></a>, try using the <code translate="no" dir="ltr">scipy.ndimage.rotate</code> function instead:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-kn">import</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">scipy.ndimage</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-k">as</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nn">ndimage</span> <span class="devsite-syntax-nd">@tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">py_function</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">Tout</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">float32</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">random_rotate_image</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">image</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">ndimage</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">rotate</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">np</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">random</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">uniform</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-o">-</span><span class="devsite-syntax-mi">30</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mi">30</span><span class="devsite-syntax-p">),</span> <span class="devsite-syntax-n">reshape</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">False</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">image</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">images_ds</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">image</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">random_rotate_image</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">show</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-0.07214577..1.0803627]. </pre></devsite-code> <p><img src="/static/guide/data_files/output__wEyL7bS9S6t_1.png" alt="png"></p> <p>To use this function with <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map"><code translate="no" dir="ltr">Dataset.map</code></a> the same caveats apply as with <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator"><code translate="no" dir="ltr">Dataset.from_generator</code></a>, you need to describe the return shapes and types when you apply the function:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">tf_random_rotate_image</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">im_shape</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">image</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span> <span class="devsite-syntax-n">image</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">random_rotate_image</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">image</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">set_shape</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">im_shape</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">rot_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">images_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf_random_rotate_image</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">rot_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">show</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-0.014158356..1.0156134]. Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-0.067302234..1.1018459]. </pre></devsite-code> <p><img src="/static/guide/data_files/output_bWPqKbTnbDct_1.png" alt="png"></p> <p><img src="/static/guide/data_files/output_bWPqKbTnbDct_2.png" alt="png"></p> <h3 id="parsing_tfexample_protocol_buffer_messages" data-text="Parsing tf.Example protocol buffer messages" tabindex="-1">Parsing <code translate="no" dir="ltr">tf.Example</code> protocol buffer messages</h3> <p>Many input pipelines extract <a href="https://www.tensorflow.org/api_docs/python/tf/train/Example"><code translate="no" dir="ltr">tf.train.Example</code></a> protocol buffer messages from a TFRecord format. Each <a href="https://www.tensorflow.org/api_docs/python/tf/train/Example"><code translate="no" dir="ltr">tf.train.Example</code></a> record contains one or more "features", and the input pipeline typically converts these features into tensors.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">fsns_test_file</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">utils</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">get_file</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"fsns.tfrec"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s2">"https://storage.googleapis.com/download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001"</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">dataset</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">TFRecordDataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">filenames</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-n">fsns_test_file</span><span class="devsite-syntax-p">])</span> <span class="devsite-syntax-n">dataset</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)> </pre></devsite-code> <p>You can work with <a href="https://www.tensorflow.org/api_docs/python/tf/train/Example"><code translate="no" dir="ltr">tf.train.Example</code></a> protos outside of a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> to understand the data:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">raw_example</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">parsed</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">train</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Example</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">FromString</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">raw_example</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> <span class="devsite-syntax-n">feature</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">parsed</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">features</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">feature</span> <span class="devsite-syntax-n">raw_img</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">feature</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'image/encoded'</span><span class="devsite-syntax-p">]</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">bytes_list</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">value</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-n">img</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">image</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">decode_png</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">raw_img</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">imshow</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">img</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">axis</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s1">'off'</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">_</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">plt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">title</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">feature</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s2">"image/text"</span><span class="devsite-syntax-p">]</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">bytes_list</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">value</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">])</span> </code></pre></devsite-code> <p><img src="/static/guide/data_files/output_4znsVNqnF73C_0.png" alt="png"></p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">raw_example</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-p">))</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">tf_parse</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">eg</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">example</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">io</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">parse_example</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">eg</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">newaxis</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-p">{</span> <span class="devsite-syntax-s1">'image/encoded'</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">io</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">FixedLenFeature</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-n">dtype</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">string</span><span class="devsite-syntax-p">),</span> <span class="devsite-syntax-s1">'image/text'</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">io</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">FixedLenFeature</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-n">dtype</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">string</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-p">})</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">example</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'image/encoded'</span><span class="devsite-syntax-p">][</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-n">example</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'image/text'</span><span class="devsite-syntax-p">][</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">]</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">img</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">txt</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf_parse</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">raw_example</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">txt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">repr</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">img</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()[:</span><span class="devsite-syntax-mi">20</span><span class="devsite-syntax-p">]),</span> <span class="devsite-syntax-s2">"..."</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> b'Rue Perreyon' b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X' ... </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">decoded</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">tf_parse</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">decoded</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_MapDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.string, name=None))> </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">image_batch</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">text_batch</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">decoded</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)))</span> <span class="devsite-syntax-n">image_batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> TensorShape([10]) </pre></devsite-code> <p><a id="time_series_windowing"></a></p> <h3 id="time_series_windowing" data-text="Time series windowing" tabindex="-1">Time series windowing</h3> <p>For an end-to-end time series example see: <a href="https://www.tensorflow.org/tutorials/structured_data/time_series">Time series forecasting</a>.</p> <p>Time series data is often organized with the time axis intact.</p> <p>Use a simple <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#range"><code translate="no" dir="ltr">Dataset.range</code></a> to demonstrate:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">range_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">100000</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <p>Typically, models based on this sort of data will want a contiguous time slice.</p> <p>The simplest approach would be to batch the data:</p> <h4 id="using_batch" data-text="Using batch" tabindex="-1">Using <code translate="no" dir="ltr">batch</code></h4> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">batches</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">range_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">drop_remainder</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">batch</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">batches</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14 15 16 17 18 19] [20 21 22 23 24 25 26 27 28 29] [30 31 32 33 34 35 36 37 38 39] [40 41 42 43 44 45 46 47 48 49] </pre></devsite-code> <p>Or to make dense predictions one step into the future, you might shift the features and labels by one step relative to each other:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">dense_1_step</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-c1"># Shift features and labels one step relative to each other.</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">[:</span><span class="devsite-syntax-o">-</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">:]</span> <span class="devsite-syntax-n">predict_dense_1_step</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">batches</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">dense_1_step</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">predict_dense_1_step</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">features</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-s2">" => "</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [0 1 2 3 4 5 6 7 8] => [1 2 3 4 5 6 7 8 9] [10 11 12 13 14 15 16 17 18] => [11 12 13 14 15 16 17 18 19] [20 21 22 23 24 25 26 27 28] => [21 22 23 24 25 26 27 28 29] </pre></devsite-code> <p>To predict a whole window instead of a fixed offset you can split the batches into two parts:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">batches</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">range_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">15</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">drop_remainder</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">label_next_5_steps</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">[:</span><span class="devsite-syntax-o">-</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-c1"># Inputs: All except the last 5 steps</span> <span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-o">-</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">:])</span> <span class="devsite-syntax-c1"># Labels: The last 5 steps</span> <span class="devsite-syntax-n">predict_5_steps</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">batches</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">label_next_5_steps</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">predict_5_steps</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">features</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-s2">" => "</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [0 1 2 3 4 5 6 7 8 9] => [10 11 12 13 14] [15 16 17 18 19 20 21 22 23 24] => [25 26 27 28 29] [30 31 32 33 34 35 36 37 38 39] => [40 41 42 43 44] </pre></devsite-code> <p>To allow some overlap between the features of one batch and the labels of another, use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#zip"><code translate="no" dir="ltr">Dataset.zip</code></a>:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">feature_length</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-mi">10</span> <span class="devsite-syntax-n">label_length</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-mi">3</span> <span class="devsite-syntax-n">features</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">range_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">feature_length</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">drop_remainder</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">range_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">feature_length</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">skip</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-k">lambda</span> <span class="devsite-syntax-n">labels</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">labels</span><span class="devsite-syntax-p">[:</span><span class="devsite-syntax-n">label_length</span><span class="devsite-syntax-p">])</span> <span class="devsite-syntax-n">predicted_steps</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">zip</span><span class="devsite-syntax-p">((</span><span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">predicted_steps</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">features</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-s2">" => "</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [0 1 2 3 4 5 6 7 8 9] => [10 11 12] [10 11 12 13 14 15 16 17 18 19] => [20 21 22] [20 21 22 23 24 25 26 27 28 29] => [30 31 32] [30 31 32 33 34 35 36 37 38 39] => [40 41 42] [40 41 42 43 44 45 46 47 48 49] => [50 51 52] </pre></devsite-code> <h4 id="using_window" data-text="Using window" tabindex="-1">Using <code translate="no" dir="ltr">window</code></h4> <p>While using <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> works, there are situations where you may need finer control. The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#window"><code translate="no" dir="ltr">Dataset.window</code></a> method gives you complete control, but requires some care: it returns a <code translate="no" dir="ltr">Dataset</code> of <code translate="no" dir="ltr">Datasets</code>. Go to the <a href="#dataset_structure">Dataset structure</a> section for details.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">window_size</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-mi">5</span> <span class="devsite-syntax-n">windows</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">range_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">window</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">window_size</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">shift</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">sub_ds</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">windows</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">sub_ds</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> <_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)> </pre></devsite-code> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#flat_map"><code translate="no" dir="ltr">Dataset.flat_map</code></a> method can take a dataset of datasets and flatten it into a single dataset:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">x</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">windows</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">flat_map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-k">lambda</span> <span class="devsite-syntax-n">x</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">x</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">30</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">x</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-n">end</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s1">' '</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 0 1 2 3 4 1 2 3 4 5 2 3 4 5 6 3 4 5 6 7 4 5 6 7 8 5 6 7 8 9 </pre></devsite-code> <p>In nearly all cases, you will want to <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch"><code translate="no" dir="ltr">Dataset.batch</code></a> the dataset first:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">sub_to_batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">sub</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">sub</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">window_size</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">drop_remainder</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">example</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">windows</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">flat_map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">sub_to_batch</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">example</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [0 1 2 3 4] [1 2 3 4 5] [2 3 4 5 6] [3 4 5 6 7] [4 5 6 7 8] </pre></devsite-code> <p>Now, you can see that the <code translate="no" dir="ltr">shift</code> argument controls how much each window moves over.</p> <p>Putting this together you might write this function:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">make_window_dataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">ds</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">window_size</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">shift</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">stride</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">windows</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">window</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">window_size</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">shift</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">shift</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">stride</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">stride</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">sub_to_batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">sub</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">sub</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">window_size</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">drop_remainder</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">windows</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">windows</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">flat_map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">sub_to_batch</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">windows</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">make_window_dataset</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">range_ds</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">window_size</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">shift</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">stride</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">example</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">example</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [ 0 3 6 9 12 15 18 21 24 27] [ 5 8 11 14 17 20 23 26 29 32] [10 13 16 19 22 25 28 31 34 37] [15 18 21 24 27 30 33 36 39 42] [20 23 26 29 32 35 38 41 44 47] [25 28 31 34 37 40 43 46 49 52] [30 33 36 39 42 45 48 51 54 57] [35 38 41 44 47 50 53 56 59 62] [40 43 46 49 52 55 58 61 64 67] [45 48 51 54 57 60 63 66 69 72] </pre></devsite-code> <p>Then it's easy to extract labels, as before:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">dense_labels_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">dense_1_step</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">inputs</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">dense_labels_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">inputs</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-s2">"=>"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [ 0 3 6 9 12 15 18 21 24] => [ 3 6 9 12 15 18 21 24 27] [ 5 8 11 14 17 20 23 26 29] => [ 8 11 14 17 20 23 26 29 32] [10 13 16 19 22 25 28 31 34] => [13 16 19 22 25 28 31 34 37] </pre></devsite-code> <h3 id="resampling" data-text="Resampling" tabindex="-1">Resampling</h3> <p>When working with a dataset that is very class-imbalanced, you may want to resample the dataset. <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> provides two methods to do this. The credit card fraud dataset is a good example of this sort of problem.</p> <aside class="note"><strong>Note:</strong><span> Go to <a href="https://www.tensorflow.org/tutorials/structured_data/imbalanced_data">Classification on imbalanced data</a> for a full tutorial.</span></aside><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">zip_path</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">utils</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">get_file</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">origin</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s1">'https://storage.googleapis.com/download.tensorflow.org/data/creditcard.zip'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">fname</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s1">'creditcard.zip'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">extract</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">csv_path</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">zip_path</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">replace</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s1">'.zip'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s1">'.csv'</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/creditcard.zip 69155632/69155632 ━━━━━━━━━━━━━━━━━━━━ 1s 0us/step </pre></devsite-code> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">creditcard_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">experimental</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">make_csv_dataset</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">csv_path</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">batch_size</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">1024</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label_name</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s2">"Class"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-c1"># Set the column types: 30 floats and an int.</span> <span class="devsite-syntax-n">column_defaults</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-nb">float</span><span class="devsite-syntax-p">()]</span><span class="devsite-syntax-o">*</span><span class="devsite-syntax-mi">30</span><span class="devsite-syntax-o">+</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-nb">int</span><span class="devsite-syntax-p">()])</span> </code></pre></devsite-code> <p>Now, check the distribution of classes, it is highly skewed:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">count</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">counts</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">batch</span> <span class="devsite-syntax-n">class_1</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-o">==</span> <span class="devsite-syntax-mi">1</span> <span class="devsite-syntax-n">class_1</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">cast</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">class_1</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">class_0</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-o">==</span> <span class="devsite-syntax-mi">0</span> <span class="devsite-syntax-n">class_0</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">cast</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">class_0</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">counts</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'class_0'</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-o">+=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">reduce_sum</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">class_0</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">counts</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'class_1'</span><span class="devsite-syntax-p">]</span> <span class="devsite-syntax-o">+=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">reduce_sum</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">class_1</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">counts</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">counts</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">creditcard_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">reduce</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">initial_state</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">{</span><span class="devsite-syntax-s1">'class_0'</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s1">'class_1'</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">},</span> <span class="devsite-syntax-n">reduce_func</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">count</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">counts</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">np</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">array</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-n">counts</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'class_0'</span><span class="devsite-syntax-p">]</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-n">counts</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'class_1'</span><span class="devsite-syntax-p">]</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()])</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">astype</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">np</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">float32</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">fractions</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">counts</span><span class="devsite-syntax-o">/</span><span class="devsite-syntax-n">counts</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">sum</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">fractions</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [0.996 0.004] </pre></devsite-code> <p>A common approach to training with an imbalanced dataset is to balance it. <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> includes a few methods which enable this workflow:</p> <h4 id="datasets_sampling" data-text="Datasets sampling" tabindex="-1">Datasets sampling</h4> <p>One approach to resampling a dataset is to use <code translate="no" dir="ltr">sample_from_datasets</code>. This is more applicable when you have a separate <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> for each class.</p> <p>Here, just use filter to generate them from the credit card fraud data:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">negative_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">creditcard_ds</span> <span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">unbatch</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">filter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-k">lambda</span> <span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">==</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">repeat</span><span class="devsite-syntax-p">())</span> <span class="devsite-syntax-n">positive_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">creditcard_ds</span> <span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">unbatch</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">filter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-k">lambda</span> <span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">==</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">repeat</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">positive_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">1</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">label</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [1 1 1 1 1 1 1 1 1 1] </pre></devsite-code> <p>To use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#sample_from_datasets"><code translate="no" dir="ltr">tf.data.Dataset.sample_from_datasets</code></a> pass the datasets, and the weight for each:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">balanced_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">sample_from_datasets</span><span class="devsite-syntax-p">(</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-n">negative_ds</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">positive_ds</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-p">[</span><span class="devsite-syntax-mf">0.5</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-mf">0.5</span><span class="devsite-syntax-p">])</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <p>Now the dataset produces examples of each class with a 50/50 probability:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">balanced_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">labels</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [1 0 1 0 0 1 0 1 1 0] [1 0 0 0 0 0 0 0 1 1] [0 0 1 0 0 1 0 0 1 0] [0 1 1 0 1 0 0 1 1 0] [0 1 1 0 0 0 1 1 1 1] [1 1 1 1 1 1 0 0 0 0] [0 1 1 0 1 0 0 1 1 1] [1 1 0 0 0 0 0 1 0 1] [1 1 0 1 1 1 1 0 0 1] [0 1 1 0 0 1 0 0 0 0] </pre></devsite-code> <h4 id="rejection_resampling" data-text="Rejection resampling" tabindex="-1">Rejection resampling</h4> <p>One problem with the above <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#sample_from_datasets"><code translate="no" dir="ltr">Dataset.sample_from_datasets</code></a> approach is that it needs a separate <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code translate="no" dir="ltr">tf.data.Dataset</code></a> per class. You could use <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#filter"><code translate="no" dir="ltr">Dataset.filter</code></a> to create those two datasets, but that results in all the data being loaded twice.</p> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#rejection_resample"><code translate="no" dir="ltr">tf.data.Dataset.rejection_resample</code></a> method can be applied to a dataset to rebalance it, while only loading it once. Elements will be dropped or repeated to achieve balance.</p> <p>The <code translate="no" dir="ltr">rejection_resample</code> method takes a <code translate="no" dir="ltr">class_func</code> argument. This <code translate="no" dir="ltr">class_func</code> is applied to each dataset element, and is used to determine which class an example belongs to for the purposes of balancing.</p> <p>The goal here is to balance the label distribution, and the elements of <code translate="no" dir="ltr">creditcard_ds</code> are already <code translate="no" dir="ltr">(features, label)</code> pairs. So the <code translate="no" dir="ltr">class_func</code> just needs to return those labels:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">def</span><span class="devsite-syntax-w"> </span><span class="devsite-syntax-nf">class_func</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">label</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-k">return</span> <span class="devsite-syntax-n">label</span> </code></pre></devsite-code> <p>The resampling method deals with individual examples, so in this case you must <code translate="no" dir="ltr">unbatch</code> the dataset before applying that method.</p> <p>The method needs a target distribution, and optionally an initial distribution estimate as inputs.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">resample_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-p">(</span> <span class="devsite-syntax-n">creditcard_ds</span> <span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">unbatch</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">rejection_resample</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">class_func</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">target_dist</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-mf">0.5</span><span class="devsite-syntax-p">,</span><span class="devsite-syntax-mf">0.5</span><span class="devsite-syntax-p">],</span> <span class="devsite-syntax-n">initial_dist</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">fractions</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">))</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py:4968: Print (from tensorflow.python.ops.logging_ops) is deprecated and will be removed after 2018-08-20. Instructions for updating: Use tf.print instead of tf.Print. Note that tf.print returns a no-output operator that directly prints the output. Outside of defuns or eager mode, this operator will not be executed unless it is directly specified in session.run or used as a control dependency for other operators. This is only a concern in graph mode. Below is an example of how to ensure tf.print executes in graph mode: </pre></devsite-code> <p>The <code translate="no" dir="ltr">rejection_resample</code> method returns <code translate="no" dir="ltr">(class, example)</code> pairs where the <code translate="no" dir="ltr">class</code> is the output of the <code translate="no" dir="ltr">class_func</code>. In this case, the <code translate="no" dir="ltr">example</code> was already a <code translate="no" dir="ltr">(feature, label)</code> pair, so use <code translate="no" dir="ltr">map</code> to drop the extra copy of the labels:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">balanced_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">resample_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">map</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-k">lambda</span> <span class="devsite-syntax-n">extra_label</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">features_and_label</span><span class="devsite-syntax-p">:</span> <span class="devsite-syntax-n">features_and_label</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <p>Now the dataset produces examples of each class with a 50/50 probability:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">features</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-n">balanced_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">take</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">):</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">labels</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">())</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] Proportion of examples rejected by sampler is high: [0.995996118][0.995996118 0.00400390616][0 1] [1 0 1 0 1 0 1 0 1 1] [1 0 1 1 1 1 0 0 1 0] [1 0 1 1 0 1 0 0 0 1] [0 1 0 0 0 0 1 1 1 1] [1 0 0 0 1 1 1 0 1 0] [0 0 0 1 0 0 1 0 1 1] [0 1 0 0 0 0 1 0 1 0] [1 0 0 0 0 1 0 0 0 1] [0 0 0 0 1 1 1 1 1 0] [1 1 0 1 1 1 1 1 1 0] </pre></devsite-code> <h2 id="iterator_checkpointing" data-text="Iterator Checkpointing" tabindex="-1">Iterator Checkpointing</h2> <p>Tensorflow supports <a href="https://www.tensorflow.org/guide/checkpoint">taking checkpoints</a> so that when your training process restarts it can restore the latest checkpoint to recover most of its progress. In addition to checkpointing the model variables, you can also checkpoint the progress of the dataset iterator. This could be useful if you have a large dataset and don't want to start the dataset from the beginning on each restart. Note however that iterator checkpoints may be large, since transformations such as <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle"><code translate="no" dir="ltr">Dataset.shuffle</code></a> and <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#prefetch"><code translate="no" dir="ltr">Dataset.prefetch</code></a> require buffering elements within the iterator.</p> <p>To include your iterator in a checkpoint, pass the iterator to the <a href="https://www.tensorflow.org/api_docs/python/tf/train/Checkpoint"><code translate="no" dir="ltr">tf.train.Checkpoint</code></a> constructor.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">range_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">20</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">iterator</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-nb">iter</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">range_ds</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">ckpt</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">train</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Checkpoint</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">step</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Variable</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">0</span><span class="devsite-syntax-p">),</span> <span class="devsite-syntax-n">iterator</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">iterator</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">manager</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">train</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">CheckpointManager</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">ckpt</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-s1">'/tmp/my_ckpt'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">max_to_keep</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">3</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">iterator</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">_</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-nb">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">)])</span> <span class="devsite-syntax-n">save_path</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">manager</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">save</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">iterator</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">_</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-nb">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">)])</span> <span class="devsite-syntax-n">ckpt</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">restore</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">manager</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">latest_checkpoint</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">([</span><span class="devsite-syntax-nb">next</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">iterator</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">numpy</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-k">for</span> <span class="devsite-syntax-n">_</span> <span class="devsite-syntax-ow">in</span> <span class="devsite-syntax-nb">range</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5</span><span class="devsite-syntax-p">)])</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> [0, 1, 2, 3, 4] [5, 6, 7, 8, 9] [5, 6, 7, 8, 9] </pre></devsite-code> <aside class="note"><strong>Note:</strong><span> It is not possible to checkpoint an iterator which relies on an external state, such as a <a href="https://www.tensorflow.org/api_docs/python/tf/py_function"><code translate="no" dir="ltr">tf.py_function</code></a>. Attempting to do so will raise an exception complaining about the external state.</span></aside> <h2 id="using_tfdata_with_tfkeras" data-text="Using tf.data with tf.keras" tabindex="-1">Using <a href="https://www.tensorflow.org/api_docs/python/tf/data"><code translate="no" dir="ltr">tf.data</code></a> with <a href="https://www.tensorflow.org/api_docs/python/tf/keras"><code translate="no" dir="ltr">tf.keras</code></a></h2> <p>The <a href="https://www.tensorflow.org/api_docs/python/tf/keras"><code translate="no" dir="ltr">tf.keras</code></a> API simplifies many aspects of creating and executing machine learning models. Its <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit"><code translate="no" dir="ltr">Model.fit</code></a> and <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#evaluate"><code translate="no" dir="ltr">Model.evaluate</code></a> and <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#predict"><code translate="no" dir="ltr">Model.predict</code></a> APIs support datasets as inputs. Here is a quick dataset and model setup:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">train</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">test</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">datasets</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">fashion_mnist</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">load_data</span><span class="devsite-syntax-p">()</span> <span class="devsite-syntax-n">images</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">train</span> <span class="devsite-syntax-n">images</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">images</span><span class="devsite-syntax-o">/</span><span class="devsite-syntax-mf">255.0</span> <span class="devsite-syntax-n">labels</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">labels</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">astype</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">np</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">int32</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code><div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">fmnist_train_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">((</span><span class="devsite-syntax-n">images</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">labels</span><span class="devsite-syntax-p">))</span> <span class="devsite-syntax-n">fmnist_train_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">fmnist_train_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shuffle</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">5000</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">32</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">model</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Sequential</span><span class="devsite-syntax-p">([</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">layers</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Flatten</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">layers</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dense</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-p">])</span> <span class="devsite-syntax-n">model</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">compile</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">optimizer</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-s1">'adam'</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">loss</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">keras</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">losses</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">SparseCategoricalCrossentropy</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">from_logits</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-kc">True</span><span class="devsite-syntax-p">),</span> <span class="devsite-syntax-n">metrics</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-p">[</span><span class="devsite-syntax-s1">'accuracy'</span><span class="devsite-syntax-p">])</span> </code></pre></devsite-code> <p>Passing a dataset of <code translate="no" dir="ltr">(feature, label)</code> pairs is all that's needed for <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit"><code translate="no" dir="ltr">Model.fit</code></a> and <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#evaluate"><code translate="no" dir="ltr">Model.evaluate</code></a>:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">model</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">fit</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">fmnist_train_ds</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">epochs</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Epoch 1/2 WARNING: All log messages before absl::InitializeLog() is called are written to STDERR I0000 00:00:1723685884.693688 45100 service.cc:146] XLA service 0x7f35cc006690 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: I0000 00:00:1723685884.693721 45100 service.cc:154] StreamExecutor device (0): Tesla T4, Compute Capability 7.5 I0000 00:00:1723685884.693725 45100 service.cc:154] StreamExecutor device (1): Tesla T4, Compute Capability 7.5 I0000 00:00:1723685884.693728 45100 service.cc:154] StreamExecutor device (2): Tesla T4, Compute Capability 7.5 I0000 00:00:1723685884.693731 45100 service.cc:154] StreamExecutor device (3): Tesla T4, Compute Capability 7.5 136/1875 ━━━━━━━━━━━━━━━━━━━━ 1s 1ms/step - accuracy: 0.5241 - loss: 1.4346 I0000 00:00:1723685885.241810 45100 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process. 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - accuracy: 0.7449 - loss: 0.7643 Epoch 2/2 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.8381 - loss: 0.4704 <keras.src.callbacks.history.History at 0x7f373f583250> </pre></devsite-code> <p>If you pass an infinite dataset, for example by calling <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset#repeat"><code translate="no" dir="ltr">Dataset.repeat</code></a>, you just need to also pass the <code translate="no" dir="ltr">steps_per_epoch</code> argument:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">model</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">fit</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">fmnist_train_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">repeat</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-n">epochs</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">2</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">steps_per_epoch</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">20</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> Epoch 1/2 20/20 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.8254 - loss: 0.4682 Epoch 2/2 20/20 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.8622 - loss: 0.4263 <keras.src.callbacks.history.History at 0x7f37443e1190> </pre></devsite-code> <p>For evaluation you can pass the number of evaluation steps:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">loss</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">accuracy</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">model</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">evaluate</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">fmnist_train_ds</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"Loss :"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">loss</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"Accuracy :"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">accuracy</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.8504 - loss: 0.4343 Loss : 0.4353208839893341 Accuracy : 0.849216639995575 </pre></devsite-code> <p>For long datasets, set the number of steps to evaluate:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">loss</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">accuracy</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">model</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">evaluate</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">fmnist_train_ds</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">repeat</span><span class="devsite-syntax-p">(),</span> <span class="devsite-syntax-n">steps</span><span class="devsite-syntax-o">=</span><span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"Loss :"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">loss</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-s2">"Accuracy :"</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">accuracy</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 10/10 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8411 - loss: 0.5209 Loss : 0.46679750084877014 Accuracy : 0.84375 </pre></devsite-code> <p>The labels are not required when calling <a href="https://www.tensorflow.org/api_docs/python/tf/keras/Model#predict"><code translate="no" dir="ltr">Model.predict</code></a>.</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">predict_ds</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">tf</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">data</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">Dataset</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">from_tensor_slices</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">images</span><span class="devsite-syntax-p">)</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">batch</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-mi">32</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-n">result</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">model</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">predict</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">predict_ds</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">steps</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">result</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 10/10 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step (320, 10) </pre></devsite-code> <p>But the labels are ignored if you do pass a dataset containing them:</p> <div></div><devsite-code><pre class="devsite-click-to-copy" translate="no" dir="ltr" is-upgraded syntax="Python"><code translate="no" dir="ltr"><span class="devsite-syntax-n">result</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-n">model</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">predict</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">fmnist_train_ds</span><span class="devsite-syntax-p">,</span> <span class="devsite-syntax-n">steps</span> <span class="devsite-syntax-o">=</span> <span class="devsite-syntax-mi">10</span><span class="devsite-syntax-p">)</span> <span class="devsite-syntax-nb">print</span><span class="devsite-syntax-p">(</span><span class="devsite-syntax-n">result</span><span class="devsite-syntax-o">.</span><span class="devsite-syntax-n">shape</span><span class="devsite-syntax-p">)</span> </code></pre></devsite-code> <div></div><devsite-code><pre class="tfo-notebook-code-cell-output" translate="no" dir="ltr" is-upgraded> 10/10 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step (320, 10) </pre></devsite-code> </div> <devsite-thumb-rating position="footer"> </devsite-thumb-rating> <div class="devsite-floating-action-buttons"> </div> </article> <devsite-content-footer class="nocontent"> <p>Except as otherwise noted, the content of this page is licensed under the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 License</a>, and code samples are licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache 2.0 License</a>. For details, see the <a href="https://developers.google.com/site-policies">Google Developers Site Policies</a>. Java is a registered trademark of Oracle and/or its affiliates.</p> <p>Last updated 2024-08-15 UTC.</p> </devsite-content-footer> <devsite-notification > </devsite-notification> <div class="devsite-content-data"> <template class="devsite-content-data-template"> [[["Easy to understand","easyToUnderstand","thumb-up"],["Solved my problem","solvedMyProblem","thumb-up"],["Other","otherUp","thumb-up"]],[["Missing the information I need","missingTheInformationINeed","thumb-down"],["Too complicated / too many steps","tooComplicatedTooManySteps","thumb-down"],["Out of date","outOfDate","thumb-down"],["Samples / code issue","samplesCodeIssue","thumb-down"],["Other","otherDown","thumb-down"]],["Last updated 2024-08-15 UTC."],[],[]] </template> </div> </devsite-content> </main> <devsite-footer-promos class="devsite-footer"> </devsite-footer-promos> <devsite-footer-linkboxes class="devsite-footer"> <nav class="devsite-footer-linkboxes nocontent" aria-label="Footer links"> <ul class="devsite-footer-linkboxes-list"> <li class="devsite-footer-linkbox "> <h3 class="devsite-footer-linkbox-heading no-link">Stay connected</h3> <ul class="devsite-footer-linkbox-list"> <li class="devsite-footer-linkbox-item"> <a href="//blog.tensorflow.org" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 1)" > Blog </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//discuss.tensorflow.org" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 2)" > Forum </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//github.com/tensorflow/" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 3)" > GitHub </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//twitter.com/tensorflow" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 4)" > Twitter </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//youtube.com/tensorflow" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 5)" > YouTube </a> </li> </ul> </li> <li class="devsite-footer-linkbox "> <h3 class="devsite-footer-linkbox-heading no-link">Support</h3> <ul class="devsite-footer-linkbox-list"> <li class="devsite-footer-linkbox-item"> <a href="//github.com/tensorflow/tensorflow/issues" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 1)" > Issue tracker </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//github.com/tensorflow/tensorflow/blob/master/RELEASE.md" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 2)" > Release notes </a> </li> <li class="devsite-footer-linkbox-item"> <a href="//stackoverflow.com/questions/tagged/tensorflow" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 3)" > Stack Overflow </a> </li> <li class="devsite-footer-linkbox-item"> <a href="/extras/tensorflow_brand_guidelines.pdf" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 4)" > Brand guidelines </a> </li> <li class="devsite-footer-linkbox-item"> <a href="/about/bib" class="devsite-footer-linkbox-link gc-analytics-event" data-category="Site-Wide Custom Events" data-label="Footer Link (index 5)" > Cite TensorFlow </a> </li> </ul> </li> </ul> </nav> </devsite-footer-linkboxes> <devsite-footer-utility class="devsite-footer"> <div class="devsite-footer-utility nocontent"> <nav class="devsite-footer-utility-links" aria-label="Utility links"> <ul class="devsite-footer-utility-list"> <li class="devsite-footer-utility-item "> <a class="devsite-footer-utility-link gc-analytics-event" href="//policies.google.com/terms" data-category="Site-Wide Custom Events" data-label="Footer Terms link" > Terms </a> </li> <li class="devsite-footer-utility-item "> <a class="devsite-footer-utility-link gc-analytics-event" href="//policies.google.com/privacy" data-category="Site-Wide Custom Events" data-label="Footer Privacy link" > Privacy </a> </li> <li class="devsite-footer-utility-item glue-cookie-notification-bar-control"> <a class="devsite-footer-utility-link gc-analytics-event" href="#" data-category="Site-Wide Custom Events" data-label="Footer Manage cookies link" aria-hidden="true" > Manage cookies </a> </li> <li class="devsite-footer-utility-item devsite-footer-utility-button"> <span class="devsite-footer-utility-description">Sign up for the TensorFlow newsletter</span> <a class="devsite-footer-utility-link gc-analytics-event" href="//www.tensorflow.org/subscribe" data-category="Site-Wide Custom Events" data-label="Footer Subscribe link" > Subscribe </a> </li> </ul> <devsite-language-selector> <ul role="presentation"> <li role="presentation"> <a role="menuitem" lang="en" >English</a> </li> <li role="presentation"> <a role="menuitem" lang="es_419" >Español – América Latina</a> </li> <li role="presentation"> <a role="menuitem" lang="fr" >Français</a> </li> <li role="presentation"> <a role="menuitem" lang="id" >Indonesia</a> </li> <li role="presentation"> <a role="menuitem" lang="it" >Italiano</a> </li> <li role="presentation"> <a role="menuitem" lang="pl" >Polski</a> </li> <li role="presentation"> <a role="menuitem" lang="pt_br" >Português – Brasil</a> </li> <li role="presentation"> <a role="menuitem" lang="vi" >Tiếng Việt</a> </li> <li role="presentation"> <a role="menuitem" lang="tr" >Türkçe</a> </li> <li role="presentation"> <a role="menuitem" lang="ru" >Русский</a> </li> <li role="presentation"> <a role="menuitem" lang="he" >עברית</a> </li> <li role="presentation"> <a role="menuitem" lang="ar" >العربيّة</a> </li> <li role="presentation"> <a role="menuitem" lang="fa" >فارسی</a> </li> <li role="presentation"> <a role="menuitem" lang="hi" >हिंदी</a> </li> <li role="presentation"> <a role="menuitem" lang="bn" >বাংলা</a> </li> <li role="presentation"> <a role="menuitem" lang="th" >ภาษาไทย</a> </li> <li role="presentation"> <a role="menuitem" lang="zh_cn" >中文 – 简体</a> </li> <li role="presentation"> <a role="menuitem" lang="ja" >日本語</a> </li> <li role="presentation"> <a role="menuitem" lang="ko" >한국어</a> </li> </ul> </devsite-language-selector> </nav> </div> </devsite-footer-utility> <devsite-panel></devsite-panel> </section></section> <devsite-sitemask></devsite-sitemask> <devsite-snackbar></devsite-snackbar> <devsite-tooltip ></devsite-tooltip> <devsite-heading-link></devsite-heading-link> <devsite-analytics> <script type="application/json" analytics>[{"dimensions": {"dimension4": "TensorFlow Core", "dimension12": false, "dimension3": false, "dimension1": "Signed out", "dimension6": "en", "dimension5": "en"}, "gaid": "UA-69864048-1", "metrics": {"ratings_count": "metric2", "ratings_value": "metric1"}, "purpose": 0}]</script> <script type="application/json" tag-management>{"at": "True", "ga4": [], "ga4p": [], "gtm": [{"id": "GTM-MXSL34P", "purpose": 0}], "parameters": {"internalUser": "False", "language": {"machineTranslated": "False", "requested": "en", "served": "en"}, "pageType": "article", "projectName": "TensorFlow Core", "signedIn": "False", "tenant": "tensorflow", "recommendations": {"sourcePage": "", "sourceType": 0, "sourceRank": 0, "sourceIdenticalDescriptions": 0, "sourceTitleWords": 0, "sourceDescriptionWords": 0, "experiment": ""}, "experiment": {"ids": ""}}}</script> </devsite-analytics> <devsite-badger></devsite-badger> <script nonce="LTyj35c/CtrUmJAlmmKAZyQcb/PDv2"> (function(d,e,v,s,i,t,E){d['GoogleDevelopersObject']=i; t=e.createElement(v);t.async=1;t.src=s;E=e.getElementsByTagName(v)[0]; E.parentNode.insertBefore(t,E);})(window, document, 'script', 'https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/tensorflow/js/app_loader.js', '[15,"en",null,"/js/devsite_app_module.js","https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f","https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/tensorflow","https://tensorflow-dot-devsite-v2-prod-3p.appspot.com",null,null,["/_pwa/tensorflow/manifest.json","https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/images/video-placeholder.svg","https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/tensorflow/images/favicon.png","https://www.gstatic.com/devrel-devsite/prod/v38a693baeb774512feb42f10aac8f755d8791ed41119b5be7a531f8e16f8279f/tensorflow/images/lockup.svg","https://fonts.googleapis.com/css?family=Google+Sans:400,500|Roboto:400,400italic,500,500italic,700,700italic|Roboto+Mono:400,500,700&display=swap"],1,null,[1,6,8,12,14,17,21,25,50,52,63,70,75,76,80,87,91,92,93,97,98,100,101,102,103,104,105,107,108,109,110,112,113,116,117,118,120,122,124,125,126,127,129,130,131,132,133,134,135,136,138,140,141,147,148,149,151,152,156,157,158,159,161,163,164,168,169,170,179,180,182,183,186,191,193,196],"AIzaSyCNm9YxQumEXwGJgTDjxoxXK6m1F-9720Q","AIzaSyCc76DZePGtoyUjqKrLdsMGk_ry7sljLbY","www.tensorflow.org","AIzaSyB9bqgQ2t11WJsOX8qNsCQ6U-w91mmqF-I","AIzaSyAdYnStPdzjcJJtQ0mvIaeaMKj7_t6J_Fg",null,null,null,["TpcFeatures__enable_mirror_tenant_redirects","Profiles__enable_profile_collections","Cloud__enable_cloudx_ping","Cloud__enable_cloud_shell","Search__enable_ai_eligibility_checks","TpcFeatures__enable_unmirrored_page_left_nav","MiscFeatureFlags__developers_footer_image","Profiles__enable_release_notes_notifications","Profiles__enable_developer_profiles_callout","Search__enable_suggestions_from_borg","Profiles__enable_join_program_group_endpoint","Profiles__enable_public_developer_profiles","EngEduTelemetry__enable_engedu_telemetry","SignIn__enable_refresh_access_tokens","Search__enable_dynamic_content_confidential_banner","Profiles__enable_page_saving","Profiles__enable_stripe_subscription_management","CloudShell__cloud_shell_button","Cloud__enable_cloud_facet_chat","Profiles__enable_complete_playlist_endpoint","MiscFeatureFlags__enable_view_transitions","BookNav__enable_tenant_cache_key","Search__enable_page_map","Cloud__enable_free_trial_server_call","Profiles__enable_awarding_url","Analytics__enable_clearcut_logging","DevPro__enable_developer_subscriptions","Profiles__enable_dashboard_curated_recommendations","Profiles__enable_recognition_badges","Profiles__enable_completecodelab_endpoint","Concierge__enable_pushui","Profiles__enable_completequiz_endpoint","Cloud__enable_cloudx_experiment_ids","MiscFeatureFlags__enable_firebase_utm","Cloud__enable_legacy_calculator_redirect","MiscFeatureFlags__enable_explain_this_code","Cloud__enable_cloud_dlp_service","Experiments__reqs_query_experiments","DevPro__enable_cloud_innovators_plus","Profiles__require_profile_eligibility_for_signin","Cloud__enable_cloud_shell_fte_user_flow","MiscFeatureFlags__emergency_css","CloudShell__cloud_code_overflow_menu","MiscFeatureFlags__enable_variable_operator","MiscFeatureFlags__enable_project_variables","MiscFeatureFlags__developers_footer_dark_image","Cloud__enable_llm_concierge_chat"],null,null,"AIzaSyA58TaKli1DculwmAmbpzLVGuWc8eCQgQc","https://developerscontentserving-pa.googleapis.com","AIzaSyDWBU60w0P9hEkr29kkksYs8Z7gvZ8u_wc","https://developerscontentsearch-pa.googleapis.com",2,4,null,"https://developerprofiles-pa.googleapis.com",[15,"tensorflow","TensorFlow","www.tensorflow.org",null,"tensorflow-dot-devsite-v2-prod-3p.appspot.com",null,null,[null,1,null,null,null,null,null,null,null,null,null,[1],null,null,null,null,null,null,[1],[1,null,null,[1]],null,null,null,[1,null,1],[1,1,null,1,1]],null,[25,null,null,null,null,null,"/images/lockup.svg","/images/logo.png",null,null,null,1,1,null,null,null,null,null,null,null,null,1,null,null,null,null,[]],[],null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,[6,1],null,[[],[1,1]],[[["UA-69864048-1"],["UA-69864048-4"],null,null,["UA-69864048-5"],["GTM-MXSL34P"],null,null,[["UA-69864048-1",1]],null,[["UA-69864048-5",1]],[["GTM-MXSL34P",1]],1],[[1,1],[3,2],[12,8],[5,4],[4,3],[6,5]],[[1,1],[2,2]]],null,4],null,null,1]') </script> <devsite-a11y-announce></devsite-a11y-announce> </body> </html>