CINXE.COM
Data loading
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="description" content="Keras documentation"> <meta name="author" content="Keras Team"> <link rel="shortcut icon" href="https://keras.io/img/favicon.ico"> <link rel="canonical" href="https://keras.io/api/data_loading/" /> <!-- Social --> <meta property="og:title" content="Keras documentation: Data loading"> <meta property="og:image" content="https://keras.io/img/logo-k-keras-wb.png"> <meta name="twitter:title" content="Keras documentation: Data loading"> <meta name="twitter:image" content="https://keras.io/img/k-keras-social.png"> <meta name="twitter:card" content="summary"> <title>Data loading</title> <!-- Custom fonts for this template --> <link href="https://fonts.googleapis.com/css2?family=Open+Sans:wght@400;600;700;800&display=swap" rel="stylesheet"> <link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@300;400;600;700;800&display=swap" rel="stylesheet"> <link href="https://fonts.googleapis.com/css2?family=Roboto+Mono:wght@400&display=swap" rel="stylesheet"> <!-- Bootstrap core CSS --> <link href="/css/bootstrap.min.css" rel="stylesheet"> <!-- Custom styles for this template --> <link href="/css/docs.css?v=3" rel="stylesheet"> <link href="/css/monokai.css" rel="stylesheet"> <!-- Google Tag Manager --> <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-5DNGF4N'); </script> <script> (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); ga('create', 'UA-175165319-128', 'auto'); ga('send', 'pageview'); </script> <!-- End Google Tag Manager --> <script async defer src="https://buttons.github.io/buttons.js"></script> <link rel="preconnect" href="https://fonts.googleapis.com"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> </head> <body> <!-- Google Tag Manager (noscript) --> <noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5DNGF4N" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript> <!-- End Google Tag Manager (noscript) --> <div class="k-page"> <div class="hidden"> None </div> <nav class="nav__container"> <div class="nav__wrapper"> <div class="nav__controls--mobile"> <button class="nav__menu--button"><i class="icon--menu"></i></button> <button class="nav__menu--close"><i class="icon--close"></i></button> <a href="/"> <img class="nav__logo nav__logo--mobile" src="/img/k-logo.png" /> </a> <button class="nav__search--mobile"> <i class="icon__search--mobile"></i> </button> </div> <form class="nav__search nav__search-form--mobile"> <input class="nav__search--input" type="search" placeholder="SEARCH" aria-label="Search" /> <button class="nav__search--button" type="submit"> <i class="icon--search"></i> </button> </form> <div class="k-nav nav__mobile-menu" id="nav-menu"> <!-- version with just the active item visible --> <div class="nav flex-column nav-pills" role="tablist" aria-orientation="vertical"> <a class="nav-link" href="/getting_started/" role="tab" aria-selected="">Getting started</a> <a class="nav-link" href="/guides/" role="tab" aria-selected="">Developer guides</a> <a class="nav-link" href="/examples/" role="tab" aria-selected="">Code examples</a> <a class="nav-link active" href="/api/" role="tab" aria-selected="">Keras 3 API documentation</a> <a class="nav-sublink" href="/api/models/">Models API</a> <a class="nav-sublink" href="/api/layers/">Layers API</a> <a class="nav-sublink" href="/api/callbacks/">Callbacks API</a> <a class="nav-sublink" href="/api/ops/">Ops API</a> <a class="nav-sublink" href="/api/optimizers/">Optimizers</a> <a class="nav-sublink" href="/api/metrics/">Metrics</a> <a class="nav-sublink" href="/api/losses/">Losses</a> <a class="nav-sublink active" href="/api/data_loading/">Data loading</a> <a class="nav-sublink2" href="/api/data_loading/image/">Image data loading</a> <a class="nav-sublink2" href="/api/data_loading/timeseries/">Timeseries data loading</a> <a class="nav-sublink2" href="/api/data_loading/text/">Text data loading</a> <a class="nav-sublink2" href="/api/data_loading/audio/">Audio data loading</a> <a class="nav-sublink" href="/api/datasets/">Built-in small datasets</a> <a class="nav-sublink" href="/api/applications/">Keras Applications</a> <a class="nav-sublink" href="/api/mixed_precision/">Mixed precision</a> <a class="nav-sublink" href="/api/distribution/">Multi-device distribution</a> <a class="nav-sublink" href="/api/random/">RNG API</a> <a class="nav-sublink" href="/api/rematerialization/">Rematerialization</a> <a class="nav-sublink" href="/api/utils/">Utilities</a> <a class="nav-link" href="/2/api/" role="tab" aria-selected="">Keras 2 API documentation</a> <a class="nav-link" href="/keras_tuner/" role="tab" aria-selected="">KerasTuner: Hyperparam Tuning</a> <a class="nav-link" href="/keras_hub/" role="tab" aria-selected="">KerasHub: Pretrained Models</a> </div> </div> <a href="/"> <img class="nav__logo nav__logo--desktop" src="/img/logo.png" alt="keras.io logo" /> </a> <div class="nav__menu"> <ul class="nav__item--container"> <li class="nav__item"> <a class="nav__link" href="/getting_started/">Get started</a> </li> <li class="nav__item"> <a class="nav__link" href="/guides/">Guides</a> </li> <li class="nav__item"> <a class="nav__link nav__link--active" href="/api/">API Docs</a> </li> <li class="nav__item"> <a class="nav__link" href="/examples/">Examples</a> </li> <li class="nav__item"> <a class="nav__link" href="/keras_tuner/">Keras Tuner</a> </li> <li class="nav__item"> <a class="nav__link" href="/keras_hub/">Keras Hub</a> </li> </ul> <form class="nav__search"> <input class="nav__search--input" type="search" placeholder="SEARCH" aria-label="Search" /> <button class="nav__search--button" type="submit"> <i class="icon--search"></i> </button> </form> </div> </div> </nav> <div class="page__container flex__container"> <div class="nav__side-nav" id="nav-menu"> <div class="nav flex-column nav-pills" role="tablist" aria-orientation="vertical"> <a class="nav-link active" href="/api/" role="tab" aria-selected=""> Keras 3 API documentation </a> <div class="nav-expanded-panel"> <a class="nav-sublink" href="/api/models/">Models API</a> <a class="nav-sublink" href="/api/layers/">Layers API</a> <a class="nav-sublink" href="/api/callbacks/">Callbacks API</a> <a class="nav-sublink" href="/api/ops/">Ops API</a> <a class="nav-sublink" href="/api/optimizers/">Optimizers</a> <a class="nav-sublink" href="/api/metrics/">Metrics</a> <a class="nav-sublink" href="/api/losses/">Losses</a> <a class="nav-sublink active" href="/api/data_loading/">Data loading</a> <a class="nav-sublink2" href="/api/data_loading/image/">Image data loading</a> <a class="nav-sublink2" href="/api/data_loading/timeseries/">Timeseries data loading</a> <a class="nav-sublink2" href="/api/data_loading/text/">Text data loading</a> <a class="nav-sublink2" href="/api/data_loading/audio/">Audio data loading</a> <a class="nav-sublink" href="/api/datasets/">Built-in small datasets</a> <a class="nav-sublink" href="/api/applications/">Keras Applications</a> <a class="nav-sublink" href="/api/mixed_precision/">Mixed precision</a> <a class="nav-sublink" href="/api/distribution/">Multi-device distribution</a> <a class="nav-sublink" href="/api/random/">RNG API</a> <a class="nav-sublink" href="/api/rematerialization/">Rematerialization</a> <a class="nav-sublink" href="/api/utils/">Utilities</a> </div> <a class="nav-link" href="/2/api/" role="tab" aria-selected=""> Keras 2 API documentation </a> </div> </div> <div class="k-main"> <div class='k-main-inner' id='k-main-id'> <div class='k-content'> <div class='k-location-slug'> <span class="k-location-slug-pointer">►</span> <a href='/api/'>Keras 3 API documentation</a> / Data loading </div> <h1 id="data-loading">Data loading</h1> <p>Keras data loading utilities, located in <code>keras.utils</code>, help you go from raw data on disk to a <a href="https://www.tensorflow.org/api_docs/python/tf/data/Dataset"><code>tf.data.Dataset</code></a> object that can be used to efficiently train a model.</p> <p>These loading utilites can be combined with <a href="https://keras.io/api/layers/preprocessing_layers/">preprocessing layers</a> to futher transform your input dataset before training.</p> <p>Here's a quick example: let's say you have 10 folders, each containing 10,000 images from a different category, and you want to train a classifier that maps an image to its category.</p> <p>Your training data folder would look like this:</p> <div class="codehilite"><pre><span></span><code>training_data/ ...class_a/ ......a_image_1.jpg ......a_image_2.jpg ...class_b/ ......b_image_1.jpg ......b_image_2.jpg etc. </code></pre></div> <p>You may also have a validation data folder <code>validation_data/</code> structured in the same way.</p> <p>You could simply do:</p> <div class="codehilite"><pre><span></span><code><span class="kn">import</span><span class="w"> </span><span class="nn">keras</span> <span class="n">train_ds</span> <span class="o">=</span> <span class="n">keras</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">image_dataset_from_directory</span><span class="p">(</span> <span class="n">directory</span><span class="o">=</span><span class="s1">'training_data/'</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="s1">'inferred'</span><span class="p">,</span> <span class="n">label_mode</span><span class="o">=</span><span class="s1">'categorical'</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> <span class="n">image_size</span><span class="o">=</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">256</span><span class="p">))</span> <span class="n">validation_ds</span> <span class="o">=</span> <span class="n">keras</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">image_dataset_from_directory</span><span class="p">(</span> <span class="n">directory</span><span class="o">=</span><span class="s1">'validation_data/'</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="s1">'inferred'</span><span class="p">,</span> <span class="n">label_mode</span><span class="o">=</span><span class="s1">'categorical'</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> <span class="n">image_size</span><span class="o">=</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">256</span><span class="p">))</span> <span class="n">model</span> <span class="o">=</span> <span class="n">keras</span><span class="o">.</span><span class="n">applications</span><span class="o">.</span><span class="n">Xception</span><span class="p">(</span> <span class="n">weights</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">input_shape</span><span class="o">=</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">classes</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span> <span class="n">model</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="n">optimizer</span><span class="o">=</span><span class="s1">'rmsprop'</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">'categorical_crossentropy'</span><span class="p">)</span> <span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train_ds</span><span class="p">,</span> <span class="n">epochs</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">validation_data</span><span class="o">=</span><span class="n">validation_ds</span><span class="p">)</span> </code></pre></div> <h2 id="available-dataset-loading-utilities">Available dataset loading utilities</h2> <h3 id="image-data-loading"><a href="/api/data_loading/image/">Image data loading</a></h3> <ul> <li><a href="/api/data_loading/image/#image_dataset_from_directory-function">image_dataset_from_directory function</a></li> <li><a href="/api/data_loading/image/#load_img-function">load_img function</a></li> <li><a href="/api/data_loading/image/#img_to_array-function">img_to_array function</a></li> <li><a href="/api/data_loading/image/#save_img-function">save_img function</a></li> <li><a href="/api/data_loading/image/#array_to_img-function">array_to_img function</a></li> </ul> <h3 id="timeseries-data-loading"><a href="/api/data_loading/timeseries/">Timeseries data loading</a></h3> <ul> <li><a href="/api/data_loading/timeseries/#timeseries_dataset_from_array-function">timeseries_dataset_from_array function</a></li> <li><a href="/api/data_loading/timeseries/#pad_sequences-function">pad_sequences function</a></li> </ul> <h3 id="text-data-loading"><a href="/api/data_loading/text/">Text data loading</a></h3> <ul> <li><a href="/api/data_loading/text/#text_dataset_from_directory-function">text_dataset_from_directory function</a></li> </ul> <h3 id="audio-data-loading"><a href="/api/data_loading/audio/">Audio data loading</a></h3> <ul> <li><a href="/api/data_loading/audio/#audio_dataset_from_directory-function">audio_dataset_from_directory function</a></li> </ul> </div> <div class='k-outline'> <div class='k-outline-depth-1'> <a href='#data-loading'>Data loading</a> </div> <div class='k-outline-depth-2'> <a href='#available-dataset-loading-utilities'>Available dataset loading utilities</a> </div> <div class='k-outline-depth-3'> <a href='#image-data-loading'>Image data loading</a> </div> <div class='k-outline-depth-3'> <a href='#timeseries-data-loading'>Timeseries data loading</a> </div> <div class='k-outline-depth-3'> <a href='#text-data-loading'>Text data loading</a> </div> <div class='k-outline-depth-3'> <a href='#audio-data-loading'>Audio data loading</a> </div> </div> </div> </div> </div> </div> <footer> <div class="footer__container"> <a href="https://policies.google.com/terms">Terms</a> <div>|</div> <a href="https://policies.google.com/privacy">Privacy</a> </div> </footer> <script src="/js/index.js"></script> </body> </html>