CINXE.COM

<!DOCTYPE html> <html lang="en" data-content_root="" > <head> <meta charset="utf-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <title>Convert PyTorch models to Flax</title> <script data-cfasync="false"> document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; </script>  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" /> <link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" /> <link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" /> <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" /> <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" /> <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" /> <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" /> <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" /> <link rel="stylesheet" type="text/css" href="../../_static/styles/sphinx-book-theme.css" /> <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" /> <link rel="stylesheet" type="text/css" href="../../_static/sphinx-design.5ea377869091fd0449014c60fc090103.min.css" /> <link rel="stylesheet" type="text/css" href="../../_static/css/flax_theme.css" />  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" /> <link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" /> <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script> <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script> <script src="../../_static/jquery.js"></script> <script src="../../_static/underscore.js"></script> <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script> <script src="../../_static/doctools.js"></script> <script src="../../_static/sphinx_highlight.js"></script> <script src="../../_static/scripts/sphinx-book-theme.js"></script> <script src="../../_static/design-tabs.js"></script> <script>DOCUMENTATION_OPTIONS.pagename = 'guides/converting_and_upgrading/convert_pytorch_to_flax';</script> <link rel="shortcut icon" href="../../_static/flax.png"/> <link rel="index" title="Index" href="../../genindex.html" /> <link rel="search" title="Search" href="../../search.html" /> <link rel="next" title="Migrate checkpointing to Orbax" href="orbax_upgrade_guide.html" /> <link rel="prev" title="Migrating from Haiku to Flax" href="haiku_migration_guide.html" /> <meta name="viewport" content="width=device-width, initial-scale=1"/> <meta name="docsearch:language" content="en"/> <script async type="text/javascript" src="/_/static/javascript/readthedocs-addons.js"></script><meta name="readthedocs-project-slug" content="flax-linen" /><meta name="readthedocs-version-slug" content="latest" /><meta name="readthedocs-resolver-filename" content="/guides/converting_and_upgrading/convert_pytorch_to_flax.html" /><meta name="readthedocs-http-status" content="200" /></head> <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div> <div id="pst-scroll-pixel-helper"></div> <button type="button" class="btn rounded-pill" id="pst-back-to-top"> Back to top</button> <input type="checkbox" class="sidebar-toggle" id="pst-primary-sidebar-checkbox"/> <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label> <input type="checkbox" class="sidebar-toggle" id="pst-secondary-sidebar-checkbox"/> <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label> <div class="search-button__wrapper"> <div class="search-button__overlay"></div> <div class="search-button__search-container"> <form class="bd-search d-flex align-items-center" action="../../search.html" method="get"> <input type="search" class="form-control" name="q" id="search-input" placeholder="Search..." aria-label="Search..." autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/> <kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd> </form></div> </div> <div class="pst-async-banner-revealer d-none"> <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside> </div> <aside class="bd-header-announcement" aria-label="Announcement"> <div class="bd-header-announcement__content"> <a href="https://flax.readthedocs.io/en/latest/index.html" style="text-decoration: none; color: white;" > This site covers the old Flax Linen API. [Explore the new Flax NNX API ✨] </a> </div> </aside> <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none"> </header> <div class="bd-container"> <div class="bd-container__inner bd-page-width"> <div class="bd-sidebar-primary bd-sidebar"> <div class="sidebar-header-items sidebar-primary__section"> </div> <div class="sidebar-primary-items__start sidebar-primary__section"> <div class="sidebar-primary-item"> <a class="navbar-brand logo" href="../../index.html"> <img src="../../_static/flax.png" class="logo__image only-light" alt=" - Home"/> <script>document.write(`<img src="../../_static/flax.png" class="logo__image only-dark" alt=" - Home"/>`);</script> </a></div> <div class="sidebar-primary-item"> <script> document.write(` <button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> Search <kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd> </button> `); </script></div> <div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main"> <div class="bd-toc-item navbar-nav active"> <ul class="current nav bd-sidenav"> <li class="toctree-l1"><a class="reference internal" href="../../quick_start.html">Quick start</a></li> <li class="toctree-l1"><a class="reference internal" href="../flax_fundamentals/flax_basics.html">Flax Basics</a></li> <li class="toctree-l1 current active has-children"><a class="reference internal" href="../index.html">Guides</a><details open="open"><summary></summary><ul class="current"> <li class="toctree-l2 has-children"><a class="reference internal" href="../flax_fundamentals/index.html">Flax fundamentals</a><details><summary></summary><ul> <li class="toctree-l3"><a class="reference external" href="https://jax.readthedocs.io/en/latest/jax-101/index.html">JAX 101</a></li> <li class="toctree-l3"><a class="reference internal" href="../flax_fundamentals/flax_basics.html">Flax Basics</a></li> <li class="toctree-l3"><a class="reference internal" href="../flax_fundamentals/state_params.html">Managing Parameters and State</a></li> <li class="toctree-l3"><a class="reference internal" href="../flax_fundamentals/setup_or_nncompact.html"><code class="docutils literal notranslate">setup</code> vs <code class="docutils literal notranslate">compact</code></a></li> <li class="toctree-l3"><a class="reference internal" href="../flax_fundamentals/arguments.html">Dealing with Flax Module arguments</a></li> <li class="toctree-l3"><a class="reference internal" href="../flax_fundamentals/rng_guide.html">Randomness and PRNGs in Flax</a></li> </ul> </details></li> <li class="toctree-l2 has-children"><a class="reference internal" href="../data_preprocessing/index.html">Data preprocessing</a><details><summary></summary><ul> <li class="toctree-l3"><a class="reference internal" href="../data_preprocessing/full_eval.html">Processing the entire Dataset</a></li> <li class="toctree-l3"><a class="reference internal" href="../data_preprocessing/loading_datasets.html">Loading datasets</a></li> </ul> </details></li> <li class="toctree-l2 has-children"><a class="reference internal" href="../training_techniques/index.html">Training techniques</a><details><summary></summary><ul> <li class="toctree-l3"><a class="reference internal" href="../training_techniques/batch_norm.html">Batch normalization</a></li> <li class="toctree-l3"><a class="reference internal" href="../training_techniques/dropout.html">Dropout</a></li> <li class="toctree-l3"><a class="reference internal" href="../training_techniques/lr_schedule.html">Learning rate scheduling</a></li> <li class="toctree-l3"><a class="reference internal" href="../training_techniques/transfer_learning.html">Transfer learning</a></li> <li class="toctree-l3"><a class="reference internal" href="../training_techniques/use_checkpointing.html">Save and load checkpoints</a></li> </ul> </details></li> <li class="toctree-l2 has-children"><a class="reference internal" href="../parallel_training/index.html">Parallel training</a><details><summary></summary><ul> <li class="toctree-l3"><a class="reference internal" href="../parallel_training/ensembling.html">Ensembling on multiple devices</a></li> <li class="toctree-l3"><a class="reference internal" href="../parallel_training/flax_on_pjit.html">Scale up Flax Modules on multiple devices</a></li> </ul> </details></li> <li class="toctree-l2 has-children"><a class="reference internal" href="../model_inspection/index.html">Model inspection</a><details><summary></summary><ul> <li class="toctree-l3"><a class="reference internal" href="../model_inspection/model_surgery.html">Model surgery</a></li> <li class="toctree-l3"><a class="reference internal" href="../model_inspection/extracting_intermediates.html">Extracting intermediate values</a></li> </ul> </details></li> <li class="toctree-l2 current active has-children"><a class="reference internal" href="index.html">Converting and upgrading</a><details open="open"><summary></summary><ul class="current"> <li class="toctree-l3"><a class="reference internal" href="haiku_migration_guide.html">Migrating from Haiku to Flax</a></li> <li class="toctree-l3 current active"><a class="current reference internal" href="#">Convert PyTorch models to Flax</a></li> <li class="toctree-l3"><a class="reference internal" href="orbax_upgrade_guide.html">Migrate checkpointing to Orbax</a></li> <li class="toctree-l3"><a class="reference internal" href="optax_update_guide.html">Upgrading my codebase to Optax</a></li> <li class="toctree-l3"><a class="reference internal" href="linen_upgrade_guide.html">Upgrading my codebase to Linen</a></li> <li class="toctree-l3"><a class="reference internal" href="rnncell_upgrade_guide.html">RNNCellBase Upgrade Guide</a></li> <li class="toctree-l3"><a class="reference internal" href="regular_dict_upgrade_guide.html">Migrate to regular dicts</a></li> </ul> </details></li> <li class="toctree-l2 has-children"><a class="reference internal" href="../quantization/index.html">Quantization</a><details><summary></summary><ul> <li class="toctree-l3"><a class="reference internal" href="../quantization/fp8_basics.html">User Guide on Using FP8</a></li> </ul> </details></li> <li class="toctree-l2"><a class="reference internal" href="../flax_sharp_bits.html">The Sharp Bits</a></li> </ul> </details></li> <li class="toctree-l1 has-children"><a class="reference internal" href="../../examples/index.html">Examples</a><details><summary></summary><ul> <li class="toctree-l2"><a class="reference internal" href="../../examples/core_examples.html">Core examples</a></li> <li class="toctree-l2"><a class="reference internal" href="../../examples/google_research_examples.html">Google Research examples</a></li> <li class="toctree-l2"><a class="reference internal" href="../../examples/repositories_that_use_flax.html">Repositories that use Flax</a></li> <li class="toctree-l2"><a class="reference internal" href="../../examples/community_examples.html">Community examples</a></li> </ul> </details></li> <li class="toctree-l1"><a class="reference internal" href="../../glossary.html">Glossary</a></li> <li class="toctree-l1"><a class="reference internal" href="../../faq.html">Frequently Asked Questions (FAQ)</a></li> <li class="toctree-l1 has-children"><a class="reference internal" href="../../developer_notes/index.html">Developer notes</a><details><summary></summary><ul> <li class="toctree-l2"><a class="reference internal" href="../../developer_notes/module_lifecycle.html">The Flax Module lifecycle</a></li> <li class="toctree-l2"><a class="reference internal" href="../../developer_notes/lift.html">Lifted transformations</a></li> <li class="toctree-l2"><a class="reference external" href="https://github.com/google/flax/tree/main/docs/flip">FLIPs</a></li> </ul> </details></li> <li class="toctree-l1"><a class="reference internal" href="../../philosophy.html">The Flax philosophy</a></li> <li class="toctree-l1"><a class="reference internal" href="../../contributing.html">How to contribute</a></li> <li class="toctree-l1 has-children"><a class="reference internal" href="../../api_reference/index.html">API Reference</a><details><summary></summary><ul> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.config.html">flax.config package</a></li> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.core.frozen_dict.html">flax.core.frozen_dict package</a></li> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.cursor.html">flax.cursor package</a></li> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.errors.html">flax.errors package</a></li> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.jax_utils.html">flax.jax_utils package</a></li> <li class="toctree-l2 has-children"><a class="reference internal" href="../../api_reference/flax.linen/index.html">flax.linen</a><details><summary></summary><ul> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/module.html">Module</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/init_apply.html">Init/Apply</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/layers.html">Layers</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/activation_functions.html">Activation functions</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/initializers.html">Initializers</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/transformations.html">Transformations</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/inspection.html">Inspection</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/variable.html">Variable dictionary</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/spmd.html">SPMD</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/decorators.html">Decorators</a></li> <li class="toctree-l3"><a class="reference internal" href="../../api_reference/flax.linen/profiling.html">Profiling</a></li> </ul> </details></li> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.serialization.html">flax.serialization package</a></li> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.struct.html">flax.struct package</a></li> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.traceback_util.html">flax.traceback_util package</a></li> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.training.html">flax.training package</a></li> <li class="toctree-l2"><a class="reference internal" href="../../api_reference/flax.traverse_util.html">flax.traverse_util package</a></li> </ul> </details></li> <li class="toctree-l1"><a class="reference external" href="https://flax.readthedocs.io/en/latest/index.html">Flax NNX</a></li> </ul> </div> </nav></div> </div> <div class="sidebar-primary-items__end sidebar-primary__section"> </div> <div id="rtd-footer-container"></div> </div> <main id="main-content" class="bd-main" role="main"> <div class="sbt-scroll-pixel-helper"></div> <div class="bd-content"> <div class="bd-article-container"> <div class="bd-header-article d-print-none"> <div class="header-article-items header-article__inner"> <div class="header-article-items__start"> <div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip"> </button></div> </div> <div class="header-article-items__end"> <div class="header-article-item"> <div class="article-header-buttons"> <a href="https://github.com/google/flax" target="_blank" class="btn btn-sm btn-source-repository-button" title="Source repository" data-bs-placement="bottom" data-bs-toggle="tooltip" > </a> <div class="dropdown dropdown-download-buttons"> <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page"> </button> <ul class="dropdown-menu"> <li><a href="../../_sources/guides/converting_and_upgrading/convert_pytorch_to_flax.rst" target="_blank" class="btn btn-sm btn-download-source-button dropdown-item" title="Download source file" data-bs-placement="left" data-bs-toggle="tooltip" > .rst </a> </li> <li> <button onclick="window.print()" class="btn btn-sm btn-download-pdf-button dropdown-item" title="Print to PDF" data-bs-placement="left" data-bs-toggle="tooltip" > .pdf </button> </li> </ul> </div> <button onclick="toggleFullScreen()" class="btn btn-sm btn-fullscreen-button" title="Fullscreen mode" data-bs-placement="bottom" data-bs-toggle="tooltip" > </button> <script> document.write(` <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> </button> `); </script> <script> document.write(` <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> </button> `); </script> <button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip"> </button> </div></div> </div> </div> </div> <div id="jb-print-docs-body" class="onlyprint"> <h1>Convert PyTorch models to Flax</h1>  <div id="print-main-content"> <div id="jb-print-toc"> <div> <h2> Contents </h2> </div> <nav aria-label="Page"> <ul class="visible nav section-nav flex-column"> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fc-layers">FC Layers</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#convolutions">Convolutions</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#convolutions-and-fc-layers">Convolutions and FC Layers</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#batch-norm">Batch Norm</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#average-pooling">Average Pooling</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#transposed-convolutions">Transposed Convolutions</a></li> </ul> </nav> </div> </div> </div> <div id="searchbox"></div> <article class="bd-article"> <div class="section" id="convert-pytorch-models-to-flax"> <h1>Convert PyTorch models to Flax<a class="headerlink" href="#convert-pytorch-models-to-flax" title="Permalink to this heading">#</a></h1> We will show how to convert PyTorch models to Flax. We will cover convolutions, fc layers, batch norm, and average pooling. <div class="section" id="fc-layers"> <h2>FC Layers<a class="headerlink" href="#fc-layers" title="Permalink to this heading">#</a></h2> Let’s start with fc layers. The only thing to be aware of here is that the PyTorch kernel has shape [outC, inC] and the Flax kernel has shape [inC, outC]. Transposing the kernel will do the trick. <div class="highlight-python notranslate"><div class="highlight"><pre>t_fc = torch.nn.Linear(in_features=3, out_features=4) kernel = t_fc.weight.detach().cpu().numpy() bias = t_fc.bias.detach().cpu().numpy() # [outC, inC] -> [inC, outC] kernel = jnp.transpose(kernel, (1, 0)) key = random.key(0) x = random.normal(key, (1, 3)) variables = {'params': {'kernel': kernel, 'bias': bias}} j_fc = nn.Dense(features=4) j_out = j_fc.apply(variables, x) t_x = torch.from_numpy(np.array(x)) t_out = t_fc(t_x) t_out = t_out.detach().cpu().numpy() np.testing.assert_almost_equal(j_out, t_out, decimal=6) </pre></div> </div> </div> <div class="section" id="convolutions"> <h2>Convolutions<a class="headerlink" href="#convolutions" title="Permalink to this heading">#</a></h2> Let’s now look at 2D convolutions. PyTorch uses the NCHW format and Flax uses NHWC. Consequently, the kernels will have different shapes. The kernel in PyTorch has shape [outC, inC, kH, kW] and the Flax kernel has shape [kH, kW, inC, outC]. Transposing the kernel will do the trick. <div class="highlight-python notranslate"><div class="highlight"><pre>t_conv = torch.nn.Conv2d(in_channels=3, out_channels=4, kernel_size=2, padding='valid') kernel = t_conv.weight.detach().cpu().numpy() bias = t_conv.bias.detach().cpu().numpy() # [outC, inC, kH, kW] -> [kH, kW, inC, outC] kernel = jnp.transpose(kernel, (2, 3, 1, 0)) key = random.key(0) x = random.normal(key, (1, 6, 6, 3)) variables = {'params': {'kernel': kernel, 'bias': bias}} j_conv = nn.Conv(features=4, kernel_size=(2, 2), padding='valid') j_out = j_conv.apply(variables, x) # [N, H, W, C] -> [N, C, H, W] t_x = torch.from_numpy(np.transpose(np.array(x), (0, 3, 1, 2))) t_out = t_conv(t_x) # [N, C, H, W] -> [N, H, W, C] t_out = np.transpose(t_out.detach().cpu().numpy(), (0, 2, 3, 1)) np.testing.assert_almost_equal(j_out, t_out, decimal=6) </pre></div> </div> </div> <div class="section" id="convolutions-and-fc-layers"> <h2>Convolutions and FC Layers<a class="headerlink" href="#convolutions-and-fc-layers" title="Permalink to this heading">#</a></h2> We have to be careful, when we have a model that uses convolutions followed by fc layers (ResNet, VGG, etc). In PyTorch, the activations will have shape [N, C, H, W] after the convolutions and are then reshaped to [N, C * H * W] before being fed to the fc layers. When we port our weights from PyTorch to Flax, the activations after the convolutions will be of shape [N, H, W, C] in Flax. Before we reshape the activations for the fc layers, we have to transpose them to [N, C, H, W]. Consider this PyTorch model: <div class="highlight-python notranslate"><div class="highlight"><pre>class TModel(torch.nn.Module): def __init__(self): super(TModel, self).__init__() self.conv = torch.nn.Conv2d(in_channels=3, out_channels=4, kernel_size=2, padding='valid') self.fc = torch.nn.Linear(in_features=100, out_features=2) def forward(self, x): x = self.conv(x) x = x.reshape(x.shape[0], -1) x = self.fc(x) return x t_model = TModel() </pre></div> </div> Now, if you want to use the weights from this model in Flax, the corresponding Flax model has to look like this: <div class="highlight-python notranslate"><div class="highlight"><pre>class JModel(nn.Module): @nn.compact def __call__(self, x): x = nn.Conv(features=4, kernel_size=(2, 2), padding='valid', name='conv')(x) # [N, H, W, C] -> [N, C, H, W] x = jnp.transpose(x, (0, 3, 1, 2)) x = jnp.reshape(x, (x.shape[0], -1)) x = nn.Dense(features=2, name='fc')(x) return x j_model = JModel() </pre></div> </div> The model looks very similar to the PyTorch model, except that we included a transpose operation before reshaping our activations for the fc layer. We can omit the transpose operation if we apply pooling before reshaping such that the spatial dimensions are 1x1. Other than the transpose operation before reshaping, we can convert the weights the same way as we did before: <div class="highlight-python notranslate"><div class="highlight"><pre>conv_kernel = t_model.state_dict()['conv.weight'].detach().cpu().numpy() conv_bias = t_model.state_dict()['conv.bias'].detach().cpu().numpy() fc_kernel = t_model.state_dict()['fc.weight'].detach().cpu().numpy() fc_bias = t_model.state_dict()['fc.bias'].detach().cpu().numpy() # [outC, inC, kH, kW] -> [kH, kW, inC, outC] conv_kernel = jnp.transpose(conv_kernel, (2, 3, 1, 0)) # [outC, inC] -> [inC, outC] fc_kernel = jnp.transpose(fc_kernel, (1, 0)) variables = {'params': {'conv': {'kernel': conv_kernel, 'bias': conv_bias}, 'fc': {'kernel': fc_kernel, 'bias': fc_bias}}} key = random.key(0) x = random.normal(key, (1, 6, 6, 3)) j_out = j_model.apply(variables, x) # [N, H, W, C] -> [N, C, H, W] t_x = torch.from_numpy(np.transpose(np.array(x), (0, 3, 1, 2))) t_out = t_model(t_x) t_out = t_out.detach().cpu().numpy() np.testing.assert_almost_equal(j_out, t_out, decimal=6) </pre></div> </div> </div> <div class="section" id="batch-norm"> <h2>Batch Norm<a class="headerlink" href="#batch-norm" title="Permalink to this heading">#</a></h2> <code class="docutils literal notranslate">torch.nn.BatchNorm2d</code> uses <code class="docutils literal notranslate">0.1</code> as the default value for the <code class="docutils literal notranslate">momentum</code> parameter while <a class="reference external" href="https://flax.readthedocs.io/en/latest/api_reference/flax.linen/layers.html#flax.linen.BatchNorm"><code class="docutils literal notranslate">nn.BatchNorm</code></a> uses <code class="docutils literal notranslate">0.9</code>. However, this corresponds to the same computation, because PyTorch multiplies the estimated statistic with <code class="docutils literal notranslate">(1 − momentum)</code> and the new observed value with <code class="docutils literal notranslate">momentum</code>, while Flax multiplies the estimated statistic with <code class="docutils literal notranslate">momentum</code> and the new observed value with <code class="docutils literal notranslate">(1 − momentum)</code>. <div class="highlight-python notranslate"><div class="highlight"><pre>t_bn = torch.nn.BatchNorm2d(num_features=3, momentum=0.1) t_bn.eval() scale = t_bn.weight.detach().cpu().numpy() bias = t_bn.bias.detach().cpu().numpy() mean = t_bn.running_mean.detach().cpu().numpy() var = t_bn.running_var.detach().cpu().numpy() variables = {'params': {'scale': scale, 'bias': bias}, 'batch_stats': {'mean': mean, 'var': var}} key = random.key(0) x = random.normal(key, (1, 6, 6, 3)) j_bn = nn.BatchNorm(momentum=0.9, use_running_average=True) j_out = j_bn.apply(variables, x) # [N, H, W, C] -> [N, C, H, W] t_x = torch.from_numpy(np.transpose(np.array(x), (0, 3, 1, 2))) t_out = t_bn(t_x) # [N, C, H, W] -> [N, H, W, C] t_out = np.transpose(t_out.detach().cpu().numpy(), (0, 2, 3, 1)) np.testing.assert_almost_equal(j_out, t_out, decimal=6) </pre></div> </div> </div> <div class="section" id="average-pooling"> <h2>Average Pooling<a class="headerlink" href="#average-pooling" title="Permalink to this heading">#</a></h2> <code class="docutils literal notranslate">torch.nn.AvgPool2d</code> and <a class="reference external" href="https://flax.readthedocs.io/en/latest/api_reference/flax.linen/layers.html#flax.linen.avg_pool"><code class="docutils literal notranslate">nn.avg_pool()</code></a> are compatible when using default parameters. However, <code class="docutils literal notranslate">torch.nn.AvgPool2d</code> has a parameter <code class="docutils literal notranslate">count_include_pad</code>. When <code class="docutils literal notranslate">count_include_pad=False</code>, the zero-padding will not be considered for the average calculation. There does not exist a similar parameter for <a class="reference external" href="https://flax.readthedocs.io/en/latest/api_reference/flax.linen/layers.html#flax.linen.avg_pool"><code class="docutils literal notranslate">nn.avg_pool()</code></a>. However, we can easily implement a wrapper around the pooling operation. <code class="docutils literal notranslate">nn.pool()</code> is the core function behind <a class="reference external" href="https://flax.readthedocs.io/en/latest/api_reference/flax.linen/layers.html#flax.linen.avg_pool"><code class="docutils literal notranslate">nn.avg_pool()</code></a> and <a class="reference external" href="https://flax.readthedocs.io/en/latest/api_reference/flax.linen/layers.html#flax.linen.max_pool"><code class="docutils literal notranslate">nn.max_pool()</code></a>. <div class="highlight-python notranslate"><div class="highlight"><pre>def avg_pool(inputs, window_shape, strides=None, padding='VALID'): """ Pools the input by taking the average over a window. In comparison to nn.avg_pool(), this pooling operation does not consider the padded zero's for the average computation. """ assert len(window_shape) == 2 y = nn.pool(inputs, 0., jax.lax.add, window_shape, strides, padding) counts = nn.pool(jnp.ones_like(inputs), 0., jax.lax.add, window_shape, strides, padding) y = y / counts return y key = random.key(0) x = random.normal(key, (1, 6, 6, 3)) j_out = avg_pool(x, window_shape=(2, 2), strides=(1, 1), padding=((1, 1), (1, 1))) t_pool = torch.nn.AvgPool2d(kernel_size=2, stride=1, padding=1, count_include_pad=False) # [N, H, W, C] -> [N, C, H, W] t_x = torch.from_numpy(np.transpose(np.array(x), (0, 3, 1, 2))) t_out = t_pool(t_x) # [N, C, H, W] -> [N, H, W, C] t_out = np.transpose(t_out.detach().cpu().numpy(), (0, 2, 3, 1)) np.testing.assert_almost_equal(j_out, t_out, decimal=6) </pre></div> </div> </div> <div class="section" id="transposed-convolutions"> <h2>Transposed Convolutions<a class="headerlink" href="#transposed-convolutions" title="Permalink to this heading">#</a></h2> <code class="docutils literal notranslate">torch.nn.ConvTranspose2d</code> and <a class="reference external" href="https://flax.readthedocs.io/en/latest/api_reference/flax.linen/layers.html#flax.linen.ConvTranspose"><code class="docutils literal notranslate">nn.ConvTranspose</code></a> are not compatible. <a class="reference external" href="https://flax.readthedocs.io/en/latest/api_reference/flax.linen/layers.html#flax.linen.ConvTranspose"><code class="docutils literal notranslate">nn.ConvTranspose</code></a> is a wrapper around <a class="reference external" href="https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.conv_transpose.html"><code class="docutils literal notranslate">jax.lax.conv_transpose</code></a> which computes a fractionally strided convolution, while <code class="docutils literal notranslate">torch.nn.ConvTranspose2d</code> computes a gradient based transposed convolution. Currently, there is no implementation of a gradient based transposed convolution is <code class="docutils literal notranslate">Jax</code>. However, there is a pending <a class="reference external" href="https://github.com/jax-ml/jax/pull/5772">pull request</a> that contains an implementation. To load <code class="docutils literal notranslate">torch.nn.ConvTranspose2d</code> parameters into Flax, we need to use the <code class="docutils literal notranslate">transpose_kernel</code> arg in Flax’s <code class="docutils literal notranslate">nn.ConvTranspose</code> layer. <div class="highlight-python notranslate"><div class="highlight"><pre># padding is inverted torch_padding = 0 flax_padding = 1 - torch_padding t_conv = torch.nn.ConvTranspose2d(in_channels=3, out_channels=4, kernel_size=2, padding=torch_padding) kernel = t_conv.weight.detach().cpu().numpy() bias = t_conv.bias.detach().cpu().numpy() # [inC, outC, kH, kW] -> [kH, kW, outC, inC] kernel = jnp.transpose(kernel, (2, 3, 1, 0)) key = random.key(0) x = random.normal(key, (1, 6, 6, 3)) variables = {'params': {'kernel': kernel, 'bias': bias}} # ConvTranspose expects the kernel to be [kH, kW, inC, outC], # but with `transpose_kernel=True`, it expects [kH, kW, outC, inC] instead j_conv = nn.ConvTranspose(features=4, kernel_size=(2, 2), padding=flax_padding, transpose_kernel=True) j_out = j_conv.apply(variables, x) # [N, H, W, C] -> [N, C, H, W] t_x = torch.from_numpy(np.transpose(np.array(x), (0, 3, 1, 2))) t_out = t_conv(t_x) # [N, C, H, W] -> [N, H, W, C] t_out = np.transpose(t_out.detach().cpu().numpy(), (0, 2, 3, 1)) np.testing.assert_almost_equal(j_out, t_out, decimal=6) </pre></div> </div> </div> </div> </article> <footer class="prev-next-footer d-print-none"> <div class="prev-next-area"> <a class="left-prev" href="haiku_migration_guide.html" title="previous page"> <div class="prev-next-info"> previous Migrating from Haiku to Flax </div> </a> <a class="right-next" href="orbax_upgrade_guide.html" title="next page"> <div class="prev-next-info"> next Migrate checkpointing to Orbax </div> </a> </div> </footer> </div> <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner"> <div class="sidebar-secondary-item"> <div class="page-toc tocsection onthispage"> Contents </div> <nav class="bd-toc-nav page-toc"> <ul class="visible nav section-nav flex-column"> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fc-layers">FC Layers</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#convolutions">Convolutions</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#convolutions-and-fc-layers">Convolutions and FC Layers</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#batch-norm">Batch Norm</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#average-pooling">Average Pooling</a></li> <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#transposed-convolutions">Transposed Convolutions</a></li> </ul> </nav></div> </div></div> </div> <footer class="bd-footer-content"> <div class="bd-footer-content__inner container"> <div class="footer-item"> By The Flax authors </div> <div class="footer-item"> © Copyright 2023, The Flax authors. </div> <div class="footer-item"> </div> <div class="footer-item"> </div> </div> </footer> </main> </div> </div>  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script> <script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script> <footer class="bd-footer"> </footer> </body> </html>