CINXE.COM

Optimizers

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="description" content="Keras documentation"> <meta name="author" content="Keras Team"> <link rel="shortcut icon" href="https://keras.io/img/favicon.ico"> <link rel="canonical" href="https://keras.io/api/optimizers/" /> <!-- Social --> <meta property="og:title" content="Keras documentation: Optimizers"> <meta property="og:image" content="https://keras.io/img/logo-k-keras-wb.png"> <meta name="twitter:title" content="Keras documentation: Optimizers"> <meta name="twitter:image" content="https://keras.io/img/k-keras-social.png"> <meta name="twitter:card" content="summary"> <title>Optimizers</title> <!-- Bootstrap core CSS --> <link href="/css/bootstrap.min.css" rel="stylesheet"> <!-- Custom fonts for this template --> <link href="https://fonts.googleapis.com/css2?family=Open+Sans:wght@400;600;700;800&display=swap" rel="stylesheet"> <!-- Custom styles for this template --> <link href="/css/docs.css" rel="stylesheet"> <link href="/css/monokai.css" rel="stylesheet"> <!-- Google Tag Manager --> <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-5DNGF4N'); </script> <script> (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); ga('create', 'UA-175165319-128', 'auto'); ga('send', 'pageview'); </script> <!-- End Google Tag Manager --> <script async defer src="https://buttons.github.io/buttons.js"></script> </head> <body> <!-- Google Tag Manager (noscript) --> <noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5DNGF4N" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript> <!-- End Google Tag Manager (noscript) --> <div class='k-page'> <div class="k-nav" id="nav-menu"> <a href='/'><img src='/img/logo-small.png' class='logo-small' /></a> <div class="nav flex-column nav-pills" role="tablist" aria-orientation="vertical"> <a class="nav-link" href="/about/" role="tab" aria-selected="">About Keras</a> <a class="nav-link" href="/getting_started/" role="tab" aria-selected="">Getting started</a> <a class="nav-link" href="/guides/" role="tab" aria-selected="">Developer guides</a> <a class="nav-link active" href="/api/" role="tab" aria-selected="">Keras 3 API documentation</a> <a class="nav-sublink" href="/api/models/">Models API</a> <a class="nav-sublink" href="/api/layers/">Layers API</a> <a class="nav-sublink" href="/api/callbacks/">Callbacks API</a> <a class="nav-sublink" href="/api/ops/">Ops API</a> <a class="nav-sublink active" href="/api/optimizers/">Optimizers</a> <a class="nav-sublink2" href="/api/optimizers/sgd/">SGD</a> <a class="nav-sublink2" href="/api/optimizers/rmsprop/">RMSprop</a> <a class="nav-sublink2" href="/api/optimizers/adam/">Adam</a> <a class="nav-sublink2" href="/api/optimizers/adamw/">AdamW</a> <a class="nav-sublink2" href="/api/optimizers/adadelta/">Adadelta</a> <a class="nav-sublink2" href="/api/optimizers/adagrad/">Adagrad</a> <a class="nav-sublink2" href="/api/optimizers/adamax/">Adamax</a> <a class="nav-sublink2" href="/api/optimizers/adafactor/">Adafactor</a> <a class="nav-sublink2" href="/api/optimizers/Nadam/">Nadam</a> <a class="nav-sublink2" href="/api/optimizers/ftrl/">Ftrl</a> <a class="nav-sublink2" href="/api/optimizers/lion/">Lion</a> <a class="nav-sublink2" href="/api/optimizers/lamb/">Lamb</a> <a class="nav-sublink2" href="/api/optimizers/loss_scale_optimizer/">Loss Scale Optimizer</a> <a class="nav-sublink2" href="/api/optimizers/learning_rate_schedules/">Learning rate schedules API</a> <a class="nav-sublink" href="/api/metrics/">Metrics</a> <a class="nav-sublink" href="/api/losses/">Losses</a> <a class="nav-sublink" href="/api/data_loading/">Data loading</a> <a class="nav-sublink" href="/api/datasets/">Built-in small datasets</a> <a class="nav-sublink" href="/api/applications/">Keras Applications</a> <a class="nav-sublink" href="/api/mixed_precision/">Mixed precision</a> <a class="nav-sublink" href="/api/distribution/">Multi-device distribution</a> <a class="nav-sublink" href="/api/random/">RNG API</a> <a class="nav-sublink" href="/api/utils/">Utilities</a> <a class="nav-sublink" href="/api/keras_tuner/">KerasTuner</a> <a class="nav-sublink" href="/api/keras_cv/">KerasCV</a> <a class="nav-sublink" href="/api/keras_nlp/">KerasNLP</a> <a class="nav-sublink" href="/api/keras_hub/">KerasHub</a> <a class="nav-link" href="/2.18/api/" role="tab" aria-selected="">Keras 2 API documentation</a> <a class="nav-link" href="/examples/" role="tab" aria-selected="">Code examples</a> <a class="nav-link" href="/keras_tuner/" role="tab" aria-selected="">KerasTuner: Hyperparameter Tuning</a> <a class="nav-link" href="/keras_hub/" role="tab" aria-selected="">KerasHub: Pretrained Models</a> <a class="nav-link" href="/keras_cv/" role="tab" aria-selected="">KerasCV: Computer Vision Workflows</a> <a class="nav-link" href="/keras_nlp/" role="tab" aria-selected="">KerasNLP: Natural Language Workflows</a> </div> </div> <div class='k-main'> <div class='k-main-top'> <script> function displayDropdownMenu() { e = document.getElementById("nav-menu"); if (e.style.display == "block") { e.style.display = "none"; } else { e.style.display = "block"; document.getElementById("dropdown-nav").style.display = "block"; } } function resetMobileUI() { if (window.innerWidth <= 840) { document.getElementById("nav-menu").style.display = "none"; document.getElementById("dropdown-nav").style.display = "block"; } else { document.getElementById("nav-menu").style.display = "block"; document.getElementById("dropdown-nav").style.display = "none"; } var navmenu = document.getElementById("nav-menu"); var menuheight = navmenu.clientHeight; var kmain = document.getElementById("k-main-id"); kmain.style.minHeight = (menuheight + 100) + 'px'; } window.onresize = resetMobileUI; window.addEventListener("load", (event) => { resetMobileUI() }); </script> <div id='dropdown-nav' onclick="displayDropdownMenu();"> <svg viewBox="-20 -20 120 120" width="60" height="60"> <rect width="100" height="20"></rect> <rect y="30" width="100" height="20"></rect> <rect y="60" width="100" height="20"></rect> </svg> </div> <form class="bd-search d-flex align-items-center k-search-form" id="search-form"> <input type="search" class="k-search-input" id="search-input" placeholder="Search Keras documentation..." aria-label="Search Keras documentation..." autocomplete="off"> <button class="k-search-btn"> <svg width="13" height="13" viewBox="0 0 13 13"><title>search</title><path d="m4.8495 7.8226c0.82666 0 1.5262-0.29146 2.0985-0.87438 0.57232-0.58292 0.86378-1.2877 0.87438-2.1144 0.010599-0.82666-0.28086-1.5262-0.87438-2.0985-0.59352-0.57232-1.293-0.86378-2.0985-0.87438-0.8055-0.010599-1.5103 0.28086-2.1144 0.87438-0.60414 0.59352-0.8956 1.293-0.87438 2.0985 0.021197 0.8055 0.31266 1.5103 0.87438 2.1144 0.56172 0.60414 1.2665 0.8956 2.1144 0.87438zm4.4695 0.2115 3.681 3.6819-1.259 1.284-3.6817-3.7 0.0019784-0.69479-0.090043-0.098846c-0.87973 0.76087-1.92 1.1413-3.1207 1.1413-1.3553 0-2.5025-0.46363-3.4417-1.3909s-1.4088-2.0686-1.4088-3.4239c0-1.3553 0.4696-2.4966 1.4088-3.4239 0.9392-0.92727 2.0864-1.3969 3.4417-1.4088 1.3553-0.011889 2.4906 0.45771 3.406 1.4088 0.9154 0.95107 1.379 2.0924 1.3909 3.4239 0 1.2126-0.38043 2.2588-1.1413 3.1385l0.098834 0.090049z"></path></svg> </button> </form> <script> var form = document.getElementById('search-form'); form.onsubmit = function(e) { e.preventDefault(); var query = document.getElementById('search-input').value; window.location.href = '/search.html?query=' + query; return False } </script> </div> <div class='k-main-inner' id='k-main-id'> <div class='k-location-slug'> <span class="k-location-slug-pointer">►</span> <a href='/api/'>Keras 3 API documentation</a> / Optimizers </div> <div class='k-content'> <h1 id="optimizers">Optimizers</h1> <h2 id="available-optimizers">Available optimizers</h2> <ul> <li><a href="/api/optimizers/sgd/">SGD</a></li> <li><a href="/api/optimizers/rmsprop/">RMSprop</a></li> <li><a href="/api/optimizers/adam/">Adam</a></li> <li><a href="/api/optimizers/adamw/">AdamW</a></li> <li><a href="/api/optimizers/adadelta/">Adadelta</a></li> <li><a href="/api/optimizers/adagrad/">Adagrad</a></li> <li><a href="/api/optimizers/adamax/">Adamax</a></li> <li><a href="/api/optimizers/adafactor/">Adafactor</a></li> <li><a href="/api/optimizers/Nadam/">Nadam</a></li> <li><a href="/api/optimizers/ftrl/">Ftrl</a></li> <li><a href="/api/optimizers/lion/">Lion</a></li> <li><a href="/api/optimizers/lamb/">Lamb</a></li> <li><a href="/api/optimizers/loss_scale_optimizer/">Loss Scale Optimizer</a></li> </ul> <hr /> <h2 id="usage-with-compile-amp-fit">Usage with <code>compile()</code> &amp; <code>fit()</code></h2> <p>An optimizer is one of the two arguments required for compiling a Keras model:</p> <div class="codehilite"><pre><span></span><code><span class="kn">import</span> <span class="nn">keras</span> <span class="kn">from</span> <span class="nn">keras</span> <span class="kn">import</span> <span class="n">layers</span> <span class="n">model</span> <span class="o">=</span> <span class="n">keras</span><span class="o">.</span><span class="n">Sequential</span><span class="p">()</span> <span class="n">model</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">layers</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">64</span><span class="p">,</span> <span class="n">kernel_initializer</span><span class="o">=</span><span class="s1">&#39;uniform&#39;</span><span class="p">,</span> <span class="n">input_shape</span><span class="o">=</span><span class="p">(</span><span class="mi">10</span><span class="p">,)))</span> <span class="n">model</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">layers</span><span class="o">.</span><span class="n">Activation</span><span class="p">(</span><span class="s1">&#39;softmax&#39;</span><span class="p">))</span> <span class="n">opt</span> <span class="o">=</span> <span class="n">keras</span><span class="o">.</span><span class="n">optimizers</span><span class="o">.</span><span class="n">Adam</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.01</span><span class="p">)</span> <span class="n">model</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="n">loss</span><span class="o">=</span><span class="s1">&#39;categorical_crossentropy&#39;</span><span class="p">,</span> <span class="n">optimizer</span><span class="o">=</span><span class="n">opt</span><span class="p">)</span> </code></pre></div> <p>You can either instantiate an optimizer before passing it to <code>model.compile()</code> , as in the above example, or you can pass it by its string identifier. In the latter case, the default parameters for the optimizer will be used.</p> <div class="codehilite"><pre><span></span><code><span class="c1"># pass optimizer by name: default parameters will be used</span> <span class="n">model</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="n">loss</span><span class="o">=</span><span class="s1">&#39;categorical_crossentropy&#39;</span><span class="p">,</span> <span class="n">optimizer</span><span class="o">=</span><span class="s1">&#39;adam&#39;</span><span class="p">)</span> </code></pre></div> <hr /> <h2 id="learning-rate-decay--scheduling">Learning rate decay / scheduling</h2> <p>You can use a <a href="/api/optimizers/learning_rate_schedules">learning rate schedule</a> to modulate how the learning rate of your optimizer changes over time:</p> <div class="codehilite"><pre><span></span><code><span class="n">lr_schedule</span> <span class="o">=</span> <span class="n">keras</span><span class="o">.</span><span class="n">optimizers</span><span class="o">.</span><span class="n">schedules</span><span class="o">.</span><span class="n">ExponentialDecay</span><span class="p">(</span> <span class="n">initial_learning_rate</span><span class="o">=</span><span class="mf">1e-2</span><span class="p">,</span> <span class="n">decay_steps</span><span class="o">=</span><span class="mi">10000</span><span class="p">,</span> <span class="n">decay_rate</span><span class="o">=</span><span class="mf">0.9</span><span class="p">)</span> <span class="n">optimizer</span> <span class="o">=</span> <span class="n">keras</span><span class="o">.</span><span class="n">optimizers</span><span class="o">.</span><span class="n">SGD</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="n">lr_schedule</span><span class="p">)</span> </code></pre></div> <p>Check out <a href="/api/optimizers/learning_rate_schedules">the learning rate schedule API documentation</a> for a list of available schedules.</p> <hr /> <h2 id="base-optimizer-api">Base Optimizer API</h2> <p>These methods and attributes are common to all Keras optimizers.</p> <p><span style="float:right;"><a href="https://github.com/keras-team/keras/tree/v3.6.0/keras/src/optimizers/optimizer.py#L21">[source]</a></span></p> <h3 id="optimizer-class"><code>Optimizer</code> class</h3> <div class="codehilite"><pre><span></span><code><span class="n">keras</span><span class="o">.</span><span class="n">optimizers</span><span class="o">.</span><span class="n">Optimizer</span><span class="p">()</span> </code></pre></div> <p>Abstract optimizer base class.</p> <p>If you intend to create your own optimization algorithm, please inherit from this class and override the following methods:</p> <ul> <li><code>build</code>: Create your optimizer-related variables, such as momentum variables in the SGD optimizer.</li> <li><code>update_step</code>: Implement your optimizer's variable updating logic.</li> <li><code>get_config</code>: serialization of the optimizer.</li> </ul> <p><strong>Example</strong></p> <div class="codehilite"><pre><span></span><code><span class="k">class</span> <span class="nc">SGD</span><span class="p">(</span><span class="n">Optimizer</span><span class="p">):</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">momentum</span> <span class="o">=</span> <span class="mf">0.9</span> <span class="k">def</span> <span class="nf">build</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">variables</span><span class="p">):</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">variables</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">momentums</span> <span class="o">=</span> <span class="p">[]</span> <span class="k">for</span> <span class="n">variable</span> <span class="ow">in</span> <span class="n">variables</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">momentums</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> <span class="bp">self</span><span class="o">.</span><span class="n">add_variable_from_reference</span><span class="p">(</span> <span class="n">reference_variable</span><span class="o">=</span><span class="n">variable</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;momentum&quot;</span> <span class="p">)</span> <span class="p">)</span> <span class="k">def</span> <span class="nf">update_step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">gradient</span><span class="p">,</span> <span class="n">variable</span><span class="p">,</span> <span class="n">learning_rate</span><span class="p">):</span> <span class="n">learning_rate</span> <span class="o">=</span> <span class="n">ops</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">learning_rate</span><span class="p">,</span> <span class="n">variable</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="n">gradient</span> <span class="o">=</span> <span class="n">ops</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">gradient</span><span class="p">,</span> <span class="n">variable</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="n">m</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">momentums</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_get_variable_index</span><span class="p">(</span><span class="n">variable</span><span class="p">)]</span> <span class="bp">self</span><span class="o">.</span><span class="n">assign</span><span class="p">(</span> <span class="n">m</span><span class="p">,</span> <span class="n">ops</span><span class="o">.</span><span class="n">subtract</span><span class="p">(</span> <span class="n">ops</span><span class="o">.</span><span class="n">multiply</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">ops</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">momentum</span><span class="p">,</span> <span class="n">variable</span><span class="o">.</span><span class="n">dtype</span><span class="p">)),</span> <span class="n">ops</span><span class="o">.</span><span class="n">multiply</span><span class="p">(</span><span class="n">gradient</span><span class="p">,</span> <span class="n">learning_rate</span><span class="p">),</span> <span class="p">),</span> <span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">assign_add</span><span class="p">(</span><span class="n">variable</span><span class="p">,</span> <span class="n">m</span><span class="p">)</span> <span class="k">def</span> <span class="nf">get_config</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="n">config</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">get_config</span><span class="p">()</span> <span class="n">config</span><span class="o">.</span><span class="n">update</span><span class="p">(</span> <span class="p">{</span> <span class="s2">&quot;momentum&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">momentum</span><span class="p">,</span> <span class="s2">&quot;nesterov&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">nesterov</span><span class="p">,</span> <span class="p">}</span> <span class="p">)</span> <span class="k">return</span> <span class="n">config</span> </code></pre></div> <hr /> <p><span style="float:right;"><a href="https://github.com/keras-team/keras/tree/v3.6.0/keras/src/optimizers/base_optimizer.py#L342">[source]</a></span></p> <h3 id="applygradients-method"><code>apply_gradients</code> method</h3> <div class="codehilite"><pre><span></span><code><span class="n">Optimizer</span><span class="o">.</span><span class="n">apply_gradients</span><span class="p">(</span><span class="n">grads_and_vars</span><span class="p">)</span> </code></pre></div> <hr /> <h3 id="variables-property"><code>variables</code> property</h3> <div class="codehilite"><pre><span></span><code><span class="n">keras</span><span class="o">.</span><span class="n">optimizers</span><span class="o">.</span><span class="n">Optimizer</span><span class="o">.</span><span class="n">variables</span> </code></pre></div> <hr /> </div> <div class='k-outline'> <div class='k-outline-depth-1'> <a href='#optimizers'>Optimizers</a> </div> <div class='k-outline-depth-2'> ◆ <a href='#available-optimizers'>Available optimizers</a> </div> <div class='k-outline-depth-2'> ◆ <a href='#usage-with-compile-amp-fit'>Usage with <code>compile()</code> & <code>fit()</code></a> </div> <div class='k-outline-depth-2'> ◆ <a href='#learning-rate-decay--scheduling'>Learning rate decay / scheduling</a> </div> <div class='k-outline-depth-2'> ◆ <a href='#base-optimizer-api'>Base Optimizer API</a> </div> <div class='k-outline-depth-3'> <a href='#optimizer-class'><code>Optimizer</code> class</a> </div> <div class='k-outline-depth-3'> <a href='#applygradients-method'><code>apply_gradients</code> method</a> </div> <div class='k-outline-depth-3'> <a href='#variables-property'><code>variables</code> property</a> </div> </div> </div> </div> </div> </body> <footer style="float: left; width: 100%; padding: 1em; border-top: solid 1px #bbb;"> <a href="https://policies.google.com/terms">Terms</a> | <a href="https://policies.google.com/privacy">Privacy</a> </footer> </html>

Pages: 1 2 3 4 5 6 7 8 9 10