CINXE.COM

Grokking Diffusion Models – Non_Interactive – Software & ML

<!doctype html> <html lang="en-US"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="profile" href="http://gmpg.org/xfn/11"> <title>Grokking Diffusion Models &#8211; Non_Interactive &#8211; Software &amp; ML</title> <meta name='robots' content='max-image-preview:large' /> <link rel="amphtml" href="https://nonint.com/2022/10/31/grokking-diffusion-models/amp/" /><meta name="generator" content="AMP for WP 1.0.77.35"/><link rel='dns-prefetch' href='//fonts.googleapis.com' /> <link rel='dns-prefetch' href='//s.w.org' /> <link rel="alternate" type="application/rss+xml" title="Non_Interactive - Software &amp; ML &raquo; Feed" href="https://nonint.com/feed/" /> <link rel="alternate" type="application/rss+xml" title="Non_Interactive - Software &amp; ML &raquo; Comments Feed" href="https://nonint.com/comments/feed/" /> <!-- This site uses the Google Analytics by ExactMetrics plugin v7.10.0 - Using Analytics tracking - https://www.exactmetrics.com/ --> <script src="//www.googletagmanager.com/gtag/js?id=UA-100350843-3" data-cfasync="false" data-wpfc-render="false" type="text/javascript" async></script> <script data-cfasync="false" data-wpfc-render="false" type="text/javascript"> var em_version = '7.10.0'; var em_track_user = true; var em_no_track_reason = ''; var disableStrs = [ 'ga-disable-UA-100350843-3', ]; /* Function to detect opted out users */ function __gtagTrackerIsOptedOut() { for (var index = 0; index < disableStrs.length; index++) { if (document.cookie.indexOf(disableStrs[index] + '=true') > -1) { return true; } } return false; } /* Disable tracking if the opt-out cookie exists. */ if (__gtagTrackerIsOptedOut()) { for (var index = 0; index < disableStrs.length; index++) { window[disableStrs[index]] = true; } } /* Opt-out function */ function __gtagTrackerOptout() { for (var index = 0; index < disableStrs.length; index++) { document.cookie = disableStrs[index] + '=true; expires=Thu, 31 Dec 2099 23:59:59 UTC; path=/'; window[disableStrs[index]] = true; } } if ('undefined' === typeof gaOptout) { function gaOptout() { __gtagTrackerOptout(); } } window.dataLayer = window.dataLayer || []; window.ExactMetricsDualTracker = { helpers: {}, trackers: {}, }; if (em_track_user) { function __gtagDataLayer() { dataLayer.push(arguments); } function __gtagTracker(type, name, parameters) { if (!parameters) { parameters = {}; } if (parameters.send_to) { __gtagDataLayer.apply(null, arguments); return; } if (type === 'event') { parameters.send_to = exactmetrics_frontend.ua; __gtagDataLayer(type, name, parameters); } else { __gtagDataLayer.apply(null, arguments); } } __gtagTracker('js', new Date()); __gtagTracker('set', { 'developer_id.dNDMyYj': true, }); __gtagTracker('config', 'UA-100350843-3', {"forceSSL":"true"} ); window.gtag = __gtagTracker; (function () { /* https://developers.google.com/analytics/devguides/collection/analyticsjs/ */ /* ga and __gaTracker compatibility shim. */ var noopfn = function () { return null; }; var newtracker = function () { return new Tracker(); }; var Tracker = function () { return null; }; var p = Tracker.prototype; p.get = noopfn; p.set = noopfn; p.send = function () { var args = Array.prototype.slice.call(arguments); args.unshift('send'); __gaTracker.apply(null, args); }; var __gaTracker = function () { var len = arguments.length; if (len === 0) { return; } var f = arguments[len - 1]; if (typeof f !== 'object' || f === null || typeof f.hitCallback !== 'function') { if ('send' === arguments[0]) { var hitConverted, hitObject = false, action; if ('event' === arguments[1]) { if ('undefined' !== typeof arguments[3]) { hitObject = { 'eventAction': arguments[3], 'eventCategory': arguments[2], 'eventLabel': arguments[4], 'value': arguments[5] ? arguments[5] : 1, } } } if ('pageview' === arguments[1]) { if ('undefined' !== typeof arguments[2]) { hitObject = { 'eventAction': 'page_view', 'page_path': arguments[2], } } } if (typeof arguments[2] === 'object') { hitObject = arguments[2]; } if (typeof arguments[5] === 'object') { Object.assign(hitObject, arguments[5]); } if ('undefined' !== typeof arguments[1].hitType) { hitObject = arguments[1]; if ('pageview' === hitObject.hitType) { hitObject.eventAction = 'page_view'; } } if (hitObject) { action = 'timing' === arguments[1].hitType ? 'timing_complete' : hitObject.eventAction; hitConverted = mapArgs(hitObject); __gtagTracker('event', action, hitConverted); } } return; } function mapArgs(args) { var arg, hit = {}; var gaMap = { 'eventCategory': 'event_category', 'eventAction': 'event_action', 'eventLabel': 'event_label', 'eventValue': 'event_value', 'nonInteraction': 'non_interaction', 'timingCategory': 'event_category', 'timingVar': 'name', 'timingValue': 'value', 'timingLabel': 'event_label', 'page': 'page_path', 'location': 'page_location', 'title': 'page_title', }; for (arg in args) { if (!(!args.hasOwnProperty(arg) || !gaMap.hasOwnProperty(arg))) { hit[gaMap[arg]] = args[arg]; } else { hit[arg] = args[arg]; } } return hit; } try { f.hitCallback(); } catch (ex) { } }; __gaTracker.create = newtracker; __gaTracker.getByName = newtracker; __gaTracker.getAll = function () { return []; }; __gaTracker.remove = noopfn; __gaTracker.loaded = true; window['__gaTracker'] = __gaTracker; })(); } else { console.log(""); (function () { function __gtagTracker() { return null; } window['__gtagTracker'] = __gtagTracker; window['gtag'] = __gtagTracker; })(); } </script> <!-- / Google Analytics by ExactMetrics --> <script type="text/javascript"> window._wpemojiSettings = {"baseUrl":"https:\/\/s.w.org\/images\/core\/emoji\/13.1.0\/72x72\/","ext":".png","svgUrl":"https:\/\/s.w.org\/images\/core\/emoji\/13.1.0\/svg\/","svgExt":".svg","source":{"concatemoji":"https:\/\/nonint.com\/wp-includes\/js\/wp-emoji-release.min.js?ver=5.9.10"}}; /*! This file is auto-generated */ !function(e,a,t){var n,r,o,i=a.createElement("canvas"),p=i.getContext&&i.getContext("2d");function s(e,t){var a=String.fromCharCode;p.clearRect(0,0,i.width,i.height),p.fillText(a.apply(this,e),0,0);e=i.toDataURL();return p.clearRect(0,0,i.width,i.height),p.fillText(a.apply(this,t),0,0),e===i.toDataURL()}function c(e){var t=a.createElement("script");t.src=e,t.defer=t.type="text/javascript",a.getElementsByTagName("head")[0].appendChild(t)}for(o=Array("flag","emoji"),t.supports={everything:!0,everythingExceptFlag:!0},r=0;r<o.length;r++)t.supports[o[r]]=function(e){if(!p||!p.fillText)return!1;switch(p.textBaseline="top",p.font="600 32px Arial",e){case"flag":return s([127987,65039,8205,9895,65039],[127987,65039,8203,9895,65039])?!1:!s([55356,56826,55356,56819],[55356,56826,8203,55356,56819])&&!s([55356,57332,56128,56423,56128,56418,56128,56421,56128,56430,56128,56423,56128,56447],[55356,57332,8203,56128,56423,8203,56128,56418,8203,56128,56421,8203,56128,56430,8203,56128,56423,8203,56128,56447]);case"emoji":return!s([10084,65039,8205,55357,56613],[10084,65039,8203,55357,56613])}return!1}(o[r]),t.supports.everything=t.supports.everything&&t.supports[o[r]],"flag"!==o[r]&&(t.supports.everythingExceptFlag=t.supports.everythingExceptFlag&&t.supports[o[r]]);t.supports.everythingExceptFlag=t.supports.everythingExceptFlag&&!t.supports.flag,t.DOMReady=!1,t.readyCallback=function(){t.DOMReady=!0},t.supports.everything||(n=function(){t.readyCallback()},a.addEventListener?(a.addEventListener("DOMContentLoaded",n,!1),e.addEventListener("load",n,!1)):(e.attachEvent("onload",n),a.attachEvent("onreadystatechange",function(){"complete"===a.readyState&&t.readyCallback()})),(n=t.source||{}).concatemoji?c(n.concatemoji):n.wpemoji&&n.twemoji&&(c(n.twemoji),c(n.wpemoji)))}(window,document,window._wpemojiSettings); </script> <style type="text/css"> img.wp-smiley, img.emoji { display: inline !important; border: none !important; box-shadow: none !important; height: 1em !important; width: 1em !important; margin: 0 0.07em !important; vertical-align: -0.1em !important; background: none !important; padding: 0 !important; } </style> <link rel='stylesheet' id='wp-block-library-css' href='https://nonint.com/wp-includes/css/dist/block-library/style.min.css?ver=5.9.10' type='text/css' media='all' /> <style id='global-styles-inline-css' type='text/css'> body{--wp--preset--color--black: #000000;--wp--preset--color--cyan-bluish-gray: #abb8c3;--wp--preset--color--white: #ffffff;--wp--preset--color--pale-pink: #f78da7;--wp--preset--color--vivid-red: #cf2e2e;--wp--preset--color--luminous-vivid-orange: #ff6900;--wp--preset--color--luminous-vivid-amber: #fcb900;--wp--preset--color--light-green-cyan: #7bdcb5;--wp--preset--color--vivid-green-cyan: #00d084;--wp--preset--color--pale-cyan-blue: #8ed1fc;--wp--preset--color--vivid-cyan-blue: #0693e3;--wp--preset--color--vivid-purple: #9b51e0;--wp--preset--gradient--vivid-cyan-blue-to-vivid-purple: linear-gradient(135deg,rgba(6,147,227,1) 0%,rgb(155,81,224) 100%);--wp--preset--gradient--light-green-cyan-to-vivid-green-cyan: linear-gradient(135deg,rgb(122,220,180) 0%,rgb(0,208,130) 100%);--wp--preset--gradient--luminous-vivid-amber-to-luminous-vivid-orange: linear-gradient(135deg,rgba(252,185,0,1) 0%,rgba(255,105,0,1) 100%);--wp--preset--gradient--luminous-vivid-orange-to-vivid-red: linear-gradient(135deg,rgba(255,105,0,1) 0%,rgb(207,46,46) 100%);--wp--preset--gradient--very-light-gray-to-cyan-bluish-gray: linear-gradient(135deg,rgb(238,238,238) 0%,rgb(169,184,195) 100%);--wp--preset--gradient--cool-to-warm-spectrum: linear-gradient(135deg,rgb(74,234,220) 0%,rgb(151,120,209) 20%,rgb(207,42,186) 40%,rgb(238,44,130) 60%,rgb(251,105,98) 80%,rgb(254,248,76) 100%);--wp--preset--gradient--blush-light-purple: linear-gradient(135deg,rgb(255,206,236) 0%,rgb(152,150,240) 100%);--wp--preset--gradient--blush-bordeaux: linear-gradient(135deg,rgb(254,205,165) 0%,rgb(254,45,45) 50%,rgb(107,0,62) 100%);--wp--preset--gradient--luminous-dusk: linear-gradient(135deg,rgb(255,203,112) 0%,rgb(199,81,192) 50%,rgb(65,88,208) 100%);--wp--preset--gradient--pale-ocean: linear-gradient(135deg,rgb(255,245,203) 0%,rgb(182,227,212) 50%,rgb(51,167,181) 100%);--wp--preset--gradient--electric-grass: linear-gradient(135deg,rgb(202,248,128) 0%,rgb(113,206,126) 100%);--wp--preset--gradient--midnight: linear-gradient(135deg,rgb(2,3,129) 0%,rgb(40,116,252) 100%);--wp--preset--duotone--dark-grayscale: url('#wp-duotone-dark-grayscale');--wp--preset--duotone--grayscale: url('#wp-duotone-grayscale');--wp--preset--duotone--purple-yellow: url('#wp-duotone-purple-yellow');--wp--preset--duotone--blue-red: url('#wp-duotone-blue-red');--wp--preset--duotone--midnight: url('#wp-duotone-midnight');--wp--preset--duotone--magenta-yellow: url('#wp-duotone-magenta-yellow');--wp--preset--duotone--purple-green: url('#wp-duotone-purple-green');--wp--preset--duotone--blue-orange: url('#wp-duotone-blue-orange');--wp--preset--font-size--small: 13px;--wp--preset--font-size--medium: 20px;--wp--preset--font-size--large: 36px;--wp--preset--font-size--x-large: 42px;}.has-black-color{color: var(--wp--preset--color--black) !important;}.has-cyan-bluish-gray-color{color: var(--wp--preset--color--cyan-bluish-gray) !important;}.has-white-color{color: var(--wp--preset--color--white) !important;}.has-pale-pink-color{color: var(--wp--preset--color--pale-pink) !important;}.has-vivid-red-color{color: var(--wp--preset--color--vivid-red) !important;}.has-luminous-vivid-orange-color{color: var(--wp--preset--color--luminous-vivid-orange) !important;}.has-luminous-vivid-amber-color{color: var(--wp--preset--color--luminous-vivid-amber) !important;}.has-light-green-cyan-color{color: var(--wp--preset--color--light-green-cyan) !important;}.has-vivid-green-cyan-color{color: var(--wp--preset--color--vivid-green-cyan) !important;}.has-pale-cyan-blue-color{color: var(--wp--preset--color--pale-cyan-blue) !important;}.has-vivid-cyan-blue-color{color: var(--wp--preset--color--vivid-cyan-blue) !important;}.has-vivid-purple-color{color: var(--wp--preset--color--vivid-purple) !important;}.has-black-background-color{background-color: var(--wp--preset--color--black) !important;}.has-cyan-bluish-gray-background-color{background-color: var(--wp--preset--color--cyan-bluish-gray) !important;}.has-white-background-color{background-color: var(--wp--preset--color--white) !important;}.has-pale-pink-background-color{background-color: var(--wp--preset--color--pale-pink) !important;}.has-vivid-red-background-color{background-color: var(--wp--preset--color--vivid-red) !important;}.has-luminous-vivid-orange-background-color{background-color: var(--wp--preset--color--luminous-vivid-orange) !important;}.has-luminous-vivid-amber-background-color{background-color: var(--wp--preset--color--luminous-vivid-amber) !important;}.has-light-green-cyan-background-color{background-color: var(--wp--preset--color--light-green-cyan) !important;}.has-vivid-green-cyan-background-color{background-color: var(--wp--preset--color--vivid-green-cyan) !important;}.has-pale-cyan-blue-background-color{background-color: var(--wp--preset--color--pale-cyan-blue) !important;}.has-vivid-cyan-blue-background-color{background-color: var(--wp--preset--color--vivid-cyan-blue) !important;}.has-vivid-purple-background-color{background-color: var(--wp--preset--color--vivid-purple) !important;}.has-black-border-color{border-color: var(--wp--preset--color--black) !important;}.has-cyan-bluish-gray-border-color{border-color: var(--wp--preset--color--cyan-bluish-gray) !important;}.has-white-border-color{border-color: var(--wp--preset--color--white) !important;}.has-pale-pink-border-color{border-color: var(--wp--preset--color--pale-pink) !important;}.has-vivid-red-border-color{border-color: var(--wp--preset--color--vivid-red) !important;}.has-luminous-vivid-orange-border-color{border-color: var(--wp--preset--color--luminous-vivid-orange) !important;}.has-luminous-vivid-amber-border-color{border-color: var(--wp--preset--color--luminous-vivid-amber) !important;}.has-light-green-cyan-border-color{border-color: var(--wp--preset--color--light-green-cyan) !important;}.has-vivid-green-cyan-border-color{border-color: var(--wp--preset--color--vivid-green-cyan) !important;}.has-pale-cyan-blue-border-color{border-color: var(--wp--preset--color--pale-cyan-blue) !important;}.has-vivid-cyan-blue-border-color{border-color: var(--wp--preset--color--vivid-cyan-blue) !important;}.has-vivid-purple-border-color{border-color: var(--wp--preset--color--vivid-purple) !important;}.has-vivid-cyan-blue-to-vivid-purple-gradient-background{background: var(--wp--preset--gradient--vivid-cyan-blue-to-vivid-purple) !important;}.has-light-green-cyan-to-vivid-green-cyan-gradient-background{background: var(--wp--preset--gradient--light-green-cyan-to-vivid-green-cyan) !important;}.has-luminous-vivid-amber-to-luminous-vivid-orange-gradient-background{background: var(--wp--preset--gradient--luminous-vivid-amber-to-luminous-vivid-orange) !important;}.has-luminous-vivid-orange-to-vivid-red-gradient-background{background: var(--wp--preset--gradient--luminous-vivid-orange-to-vivid-red) !important;}.has-very-light-gray-to-cyan-bluish-gray-gradient-background{background: var(--wp--preset--gradient--very-light-gray-to-cyan-bluish-gray) !important;}.has-cool-to-warm-spectrum-gradient-background{background: var(--wp--preset--gradient--cool-to-warm-spectrum) !important;}.has-blush-light-purple-gradient-background{background: var(--wp--preset--gradient--blush-light-purple) !important;}.has-blush-bordeaux-gradient-background{background: var(--wp--preset--gradient--blush-bordeaux) !important;}.has-luminous-dusk-gradient-background{background: var(--wp--preset--gradient--luminous-dusk) !important;}.has-pale-ocean-gradient-background{background: var(--wp--preset--gradient--pale-ocean) !important;}.has-electric-grass-gradient-background{background: var(--wp--preset--gradient--electric-grass) !important;}.has-midnight-gradient-background{background: var(--wp--preset--gradient--midnight) !important;}.has-small-font-size{font-size: var(--wp--preset--font-size--small) !important;}.has-medium-font-size{font-size: var(--wp--preset--font-size--medium) !important;}.has-large-font-size{font-size: var(--wp--preset--font-size--large) !important;}.has-x-large-font-size{font-size: var(--wp--preset--font-size--x-large) !important;} </style> <link rel='stylesheet' id='parent-style-css' href='https://nonint.com/wp-content/themes/minimalistblogger/style.css?ver=5.9.10' type='text/css' media='all' /> <link rel='stylesheet' id='dark-minimalistblogger-google-fonts-css' href='//fonts.googleapis.com/css?family=Roboto%3A400%2C700%2C900%7CZen+Antique&#038;ver=5.9.10' type='text/css' media='all' /> <link rel='stylesheet' id='font-awesome-css' href='https://nonint.com/wp-content/themes/minimalistblogger/css/font-awesome.min.css?ver=5.9.10' type='text/css' media='all' /> <link rel='stylesheet' id='minimalistblogger-style-css' href='https://nonint.com/wp-content/themes/dark-minimalistblogger/style.css?ver=5.9.10' type='text/css' media='all' /> <link rel='stylesheet' id='minimalistblogger-google-fonts-css' href='//fonts.googleapis.com/css?family=Lato%3A300%2C400%2C700%2C900%7CMerriweather%3A400%2C700&#038;ver=5.9.10' type='text/css' media='all' /> <script type='text/javascript' src='https://nonint.com/wp-content/plugins/google-analytics-dashboard-for-wp/assets/js/frontend-gtag.min.js?ver=7.10.0' id='exactmetrics-frontend-script-js'></script> <script data-cfasync="false" data-wpfc-render="false" type="text/javascript" id='exactmetrics-frontend-script-js-extra'>/* <![CDATA[ */ var exactmetrics_frontend = {"js_events_tracking":"true","download_extensions":"zip,mp3,mpeg,pdf,docx,pptx,xlsx,rar","inbound_paths":"[{\"path\":\"\\\/go\\\/\",\"label\":\"affiliate\"},{\"path\":\"\\\/recommend\\\/\",\"label\":\"affiliate\"}]","home_url":"https:\/\/nonint.com","hash_tracking":"false","ua":"UA-100350843-3","v4_id":""};/* ]]> */ </script> <script type='text/javascript' src='https://nonint.com/wp-includes/js/jquery/jquery.min.js?ver=3.6.0' id='jquery-core-js'></script> <script type='text/javascript' src='https://nonint.com/wp-includes/js/jquery/jquery-migrate.min.js?ver=3.3.2' id='jquery-migrate-js'></script> <link rel="https://api.w.org/" href="https://nonint.com/wp-json/" /><link rel="alternate" type="application/json" href="https://nonint.com/wp-json/wp/v2/posts/270" /><link rel="EditURI" type="application/rsd+xml" title="RSD" href="https://nonint.com/xmlrpc.php?rsd" /> <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="https://nonint.com/wp-includes/wlwmanifest.xml" /> <meta name="generator" content="WordPress 5.9.10" /> <link rel="canonical" href="https://nonint.com/2022/10/31/grokking-diffusion-models/" /> <link rel='shortlink' href='https://nonint.com/?p=270' /> <link rel="alternate" type="application/json+oembed" href="https://nonint.com/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fnonint.com%2F2022%2F10%2F31%2Fgrokking-diffusion-models%2F" /> <script type="text/javascript"> (function(url){ if(/(?:Chrome\/26\.0\.1410\.63 Safari\/537\.31|WordfenceTestMonBot)/.test(navigator.userAgent)){ return; } var addEvent = function(evt, handler) { if (window.addEventListener) { document.addEventListener(evt, handler, false); } else if (window.attachEvent) { document.attachEvent('on' + evt, handler); } }; var removeEvent = function(evt, handler) { if (window.removeEventListener) { document.removeEventListener(evt, handler, false); } else if (window.detachEvent) { document.detachEvent('on' + evt, handler); } }; var evts = 'contextmenu dblclick drag dragend dragenter dragleave dragover dragstart drop keydown keypress keyup mousedown mousemove mouseout mouseover mouseup mousewheel scroll'.split(' '); var logHuman = function() { if (window.wfLogHumanRan) { return; } window.wfLogHumanRan = true; var wfscr = document.createElement('script'); wfscr.type = 'text/javascript'; wfscr.async = true; wfscr.src = url + '&r=' + Math.random(); (document.getElementsByTagName('head')[0]||document.getElementsByTagName('body')[0]).appendChild(wfscr); for (var i = 0; i < evts.length; i++) { removeEvent(evts[i], logHuman); } }; for (var i = 0; i < evts.length; i++) { addEvent(evts[i], logHuman); } })('//nonint.com/?wordfence_lh=1&hid=FF7AEB4004AE4E75F99804432ED50A05'); </script> <style type="text/css"> .blogpost-button, .wp-block-search .wp-block-search__button, .comments-area p.form-submit input, .post-data-divider { background: ; } .super-menu, #smobile-menu, .primary-menu, .primary-menu ul li ul.children, .primary-menu ul li ul.sub-menu { background-color: ; } .main-navigation ul li a, .main-navigation ul li .sub-arrow, .super-menu .toggle-mobile-menu,.toggle-mobile-menu:before, .mobile-menu-active .smenu-hide { color: ; } #smobile-menu.show .main-navigation ul ul.children.active, #smobile-menu.show .main-navigation ul ul.sub-menu.active, #smobile-menu.show .main-navigation ul li, .smenu-hide.toggle-mobile-menu.menu-toggle, #smobile-menu.show .main-navigation ul li, .primary-menu ul li ul.children li, .primary-menu ul li ul.sub-menu li, .primary-menu .pmenu, .super-menu { border-color: ; border-bottom-color: ; } #secondary .widget h3, #secondary .widget h3 a, #secondary .widget h4, #secondary .widget h1, #secondary .widget h2, #secondary .widget h5, #secondary .widget h6 { color: ; } #secondary .widget a, #secondary a, #secondary .widget li a , #secondary span.sub-arrow{ color: ; } #secondary, #secondary .widget, #secondary .widget p, #secondary .widget li, .widget time.rpwe-time.published { color: ; } .swidgets-wrap .sidebar-title-border, #secondary .swidgets-wrap, #secondary .widget ul li, .featured-sidebar .search-field { border-color: ; } .site-info, .footer-column-three input.search-submit, .footer-column-three p, .footer-column-three li, .footer-column-three td, .footer-column-three th, .footer-column-three caption { color: ; } body, .site, .swidgets-wrap h3, .post-data-text { background: ; } .site-title a, .site-description { color: ; } .sheader { background: } </style> <style type="text/css"> body, .site, .swidgets-wrap h3, .post-data-text { background: ; } .site-title a, .site-description { color: ; } .sheader { background: } </style> <style type="text/css" id="wp-custom-css"> /* Posts Page */ #primary { width:100%; } /* Individual Post */ #primary-mono { width:100%; } pre.wp-block-code { border-left: 6px solid #3498DB; background-color: #000000; padding: 5px; } </style> </head> <body class="post-template-default single single-post postid-270 single-format-standard masthead-fixed"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 0 0" width="0" height="0" focusable="false" role="none" style="visibility: hidden; position: absolute; left: -9999px; overflow: hidden;" ><defs><filter id="wp-duotone-dark-grayscale"><feColorMatrix color-interpolation-filters="sRGB" type="matrix" values=" .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 " /><feComponentTransfer color-interpolation-filters="sRGB" ><feFuncR type="table" tableValues="0 0.49803921568627" /><feFuncG type="table" tableValues="0 0.49803921568627" /><feFuncB type="table" tableValues="0 0.49803921568627" /><feFuncA type="table" tableValues="1 1" /></feComponentTransfer><feComposite in2="SourceGraphic" operator="in" /></filter></defs></svg><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 0 0" width="0" height="0" focusable="false" role="none" style="visibility: hidden; position: absolute; left: -9999px; overflow: hidden;" ><defs><filter id="wp-duotone-grayscale"><feColorMatrix color-interpolation-filters="sRGB" type="matrix" values=" .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 " /><feComponentTransfer color-interpolation-filters="sRGB" ><feFuncR type="table" tableValues="0 1" /><feFuncG type="table" tableValues="0 1" /><feFuncB type="table" tableValues="0 1" /><feFuncA type="table" tableValues="1 1" /></feComponentTransfer><feComposite in2="SourceGraphic" operator="in" /></filter></defs></svg><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 0 0" width="0" height="0" focusable="false" role="none" style="visibility: hidden; position: absolute; left: -9999px; overflow: hidden;" ><defs><filter id="wp-duotone-purple-yellow"><feColorMatrix color-interpolation-filters="sRGB" type="matrix" values=" .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 " /><feComponentTransfer color-interpolation-filters="sRGB" ><feFuncR type="table" tableValues="0.54901960784314 0.98823529411765" /><feFuncG type="table" tableValues="0 1" /><feFuncB type="table" tableValues="0.71764705882353 0.25490196078431" /><feFuncA type="table" tableValues="1 1" /></feComponentTransfer><feComposite in2="SourceGraphic" operator="in" /></filter></defs></svg><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 0 0" width="0" height="0" focusable="false" role="none" style="visibility: hidden; position: absolute; left: -9999px; overflow: hidden;" ><defs><filter id="wp-duotone-blue-red"><feColorMatrix color-interpolation-filters="sRGB" type="matrix" values=" .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 " /><feComponentTransfer color-interpolation-filters="sRGB" ><feFuncR type="table" tableValues="0 1" /><feFuncG type="table" tableValues="0 0.27843137254902" /><feFuncB type="table" tableValues="0.5921568627451 0.27843137254902" /><feFuncA type="table" tableValues="1 1" /></feComponentTransfer><feComposite in2="SourceGraphic" operator="in" /></filter></defs></svg><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 0 0" width="0" height="0" focusable="false" role="none" style="visibility: hidden; position: absolute; left: -9999px; overflow: hidden;" ><defs><filter id="wp-duotone-midnight"><feColorMatrix color-interpolation-filters="sRGB" type="matrix" values=" .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 " /><feComponentTransfer color-interpolation-filters="sRGB" ><feFuncR type="table" tableValues="0 0" /><feFuncG type="table" tableValues="0 0.64705882352941" /><feFuncB type="table" tableValues="0 1" /><feFuncA type="table" tableValues="1 1" /></feComponentTransfer><feComposite in2="SourceGraphic" operator="in" /></filter></defs></svg><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 0 0" width="0" height="0" focusable="false" role="none" style="visibility: hidden; position: absolute; left: -9999px; overflow: hidden;" ><defs><filter id="wp-duotone-magenta-yellow"><feColorMatrix color-interpolation-filters="sRGB" type="matrix" values=" .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 " /><feComponentTransfer color-interpolation-filters="sRGB" ><feFuncR type="table" tableValues="0.78039215686275 1" /><feFuncG type="table" tableValues="0 0.94901960784314" /><feFuncB type="table" tableValues="0.35294117647059 0.47058823529412" /><feFuncA type="table" tableValues="1 1" /></feComponentTransfer><feComposite in2="SourceGraphic" operator="in" /></filter></defs></svg><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 0 0" width="0" height="0" focusable="false" role="none" style="visibility: hidden; position: absolute; left: -9999px; overflow: hidden;" ><defs><filter id="wp-duotone-purple-green"><feColorMatrix color-interpolation-filters="sRGB" type="matrix" values=" .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 " /><feComponentTransfer color-interpolation-filters="sRGB" ><feFuncR type="table" tableValues="0.65098039215686 0.40392156862745" /><feFuncG type="table" tableValues="0 1" /><feFuncB type="table" tableValues="0.44705882352941 0.4" /><feFuncA type="table" tableValues="1 1" /></feComponentTransfer><feComposite in2="SourceGraphic" operator="in" /></filter></defs></svg><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 0 0" width="0" height="0" focusable="false" role="none" style="visibility: hidden; position: absolute; left: -9999px; overflow: hidden;" ><defs><filter id="wp-duotone-blue-orange"><feColorMatrix color-interpolation-filters="sRGB" type="matrix" values=" .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 .299 .587 .114 0 0 " /><feComponentTransfer color-interpolation-filters="sRGB" ><feFuncR type="table" tableValues="0.098039215686275 1" /><feFuncG type="table" tableValues="0 0.66274509803922" /><feFuncB type="table" tableValues="0.84705882352941 0.41960784313725" /><feFuncA type="table" tableValues="1 1" /></feComponentTransfer><feComposite in2="SourceGraphic" operator="in" /></filter></defs></svg><a class="skip-link screen-reader-text" href="#content">Skip to content</a> <div id="page" class="site"> <header id="masthead" class="sheader site-header clearfix"> <div class="content-wrap"> <div class="site-branding"> <!-- If frontpage, make title h1 --> <p class="site-title"><a href="https://nonint.com/" rel="home">Non_Interactive &#8211; Software &amp; ML</a></p> </div> </div> <nav id="primary-site-navigation" class="primary-menu main-navigation clearfix"> <a href="#" id="pull" class="smenu-hide toggle-mobile-menu menu-toggle" aria-controls="secondary-menu" aria-expanded="false">Menu</a> <div class="content-wrap text-center"> <div class="center-main-menu"> <div id="primary-menu" class="pmenu"><ul> <li class="page_item page-item-10"><a href="https://nonint.com/contact/">Contact</a></li> <li class="page_item page-item-136"><a href="https://nonint.com/">Non_Int</a></li> <li class="page_item page-item-9"><a href="https://nonint.com/about/">What is Non-Interactive?</a></li> </ul></div> </div> </div> </nav> <div class="super-menu clearfix"> <div class="super-menu-inner"> <a href="#" id="pull" class="toggle-mobile-menu menu-toggle" aria-controls="secondary-menu" aria-expanded="false">Menu</a> </div> </div> <div id="mobile-menu-overlay"></div> </header> <div class="content-wrap"> <div class="header-widgets-wrapper"> </div> </div> <div id="content" class="site-content clearfix"> <div class="content-wrap"> <div id="primary" class="featured-content content-area"> <main id="main" class="site-main"> <article id="post-270" class="posts-entry fbox post-270 post type-post status-publish format-standard hentry category-uncategorized"> <header class="entry-header"> <h1 class="entry-title">Grokking Diffusion Models</h1> <div class="entry-meta"> <div class="blog-data-wrapper"> <div class="post-data-divider"></div> <div class="post-data-positioning"> <div class="post-data-text"> <span class="posted-on">Posted on <a href="https://nonint.com/2022/10/31/grokking-diffusion-models/" rel="bookmark"><time class="entry-date published updated" datetime="2022-10-31T11:28:30-06:00">October 31, 2022</time></a></span><span class="byline"> by <span class="author vcard"><a class="url fn n" href="https://nonint.com/author/jbetker/">jbetker</a></span></span> </div> </div> </div> </div><!-- .entry-meta --> </header><!-- .entry-header --> <div class="entry-content"> <p>Since joining OpenAI, I&#8217;ve had the distinct pleasure of interacting with some of the smartest people on the planet on the subject of generative models. In these conversations, I am often struck by how many different ways there are to &#8220;understand&#8221; how diffusion works.</p> <p>I don&#8217;t think most folk&#8217;s understanding of this paradigm is &#8220;right&#8221; or &#8220;wrong&#8221;: they are just different. I think there is a distinct value in having a different viewpoints here: an engineers perspective might be more useful to deploy these things to real products, whereas a mathematicians conceptualization may aid improvements in the core technology.</p> <p>I&#8217;d like to jump through a few of these vantage points in this post, in the hope that I can help more people understand how this amazing technology works.</p> <h2>Compute &amp; Quality</h2> <p>One aspect of generative modeling that has become quite clear to me is that finding principled ways to throw compute at a problem is a surefire way to improve output quality.</p> <p>This is round-about way of defining the concept of &#8220;scaling&#8221; in ML. However, when most people think of &#8220;scaling&#8221;, they think of making bigger models. This isn&#8217;t the only game in town, though: you can also scale compute by making a smaller model spend more time iterating on an output. This is exactly what diffusion models do!</p> <p>Here&#8217;s how that works: take a small(ish) model that is capable of doing something extremely simple. For example: remove a microscopic amount of noise from an image. Starting with pure gaussian noise, iteratively apply that model to a series of &#8220;images&#8221;. Each step, remove the noise predicted by the model. Bam! Lots of compute spent on a single image, and the model can be &#8220;pretty small&#8221;.</p> <figure class="wp-block-image size-full"><img width="785" height="73" src="https://nonint.com/wp-content/uploads/2022/10/image.png" alt="" class="wp-image-272"/><figcaption>The diffusion process, credit to <a href="https://arxiv.org/pdf/2102.09672.pdf" data-type="URL" data-id="https://arxiv.org/pdf/2102.09672.pdf">Alex Nichol and Prafulla Dhariwal</a></figcaption></figure> <p>Why does spending compute for quality work, though? Let&#8217;s pretend for a second that our ML models are humans (outlandish, I know). When a human sits down to draw a picture, they start with a general concept, e.g. &#8220;cat drinking a latte looking tired&#8221;. They might start by drawing the outline. Then they might add features (eyes, nose, clothes). Then the background. Then the fur. Next might come some color, one feature at a time. Details are slowly added, little by little. We don&#8217;t think of it as such, but we are trading compute for quality. The more time we engage our brains in refining the image, the better the image becomes. </p> <p>It stands to reason that ML models might be able to exploit this same paradigm, and I think the success of autoregression and diffusion is a strong signal that this might be &#8220;the way&#8221;.</p> <h2>Gradient Descent at Inference Time</h2> <p>I think we can all agree that the mechanisms of gradient descent are quite magical. At it&#8217;s core, it is so simple: compute the derivative of an error function. Use it to make microscopic adjustments to a couple million variables, initialized randomly. Repeat a few thousand times. But the end-result is profound: beat out every expert model humans have come up with in the last century.</p> <p>What if we went meta on this? What if we trained models that learned the learning process itself? That build error functions from pure text and self optimize? What if I told you that this is what diffusion models are? 馃檪</p> <p>To make this analogy fit, lets pretend that an image is actually a set of neural network weights. We initialize them randomly. We then use a neural network to predict the &#8220;update&#8221; for these &#8220;weights&#8221;, conditioned on some text. We take a small step in the direction that the neural network tells us is correct. We repeat this a few hundred times. Out comes an image. Or a MEL spectrogram. Or a latent. Sound familiar?</p> <h2>Means and Modes</h2> <p>ML models are largely statistical beasts, and thus it is often useful to think of them from that perspective. Taking this direction, generative models can be seen as learned mapping functions that morph one statistical distribution (most often gaussian noise) into another (the data distribution). This mapping is generally guided using information like text (called &#8220;conditioning information&#8221;).</p> <p>A mapping function like this can learn to predict any statistical property we can formulate a loss function around. The most convenient property to target is the mean of the data distribution, which is modeled with the L2/MSE or L1/MAE loss.</p> <p>Here lies a problem, though: the mean of many data distributions we might care about is meaningless (pun intended). For example, the mean of the waveforms of all music is close to 0 at all locations. The mean of all RGB pixels is a fuzzy greyish brown blob. Even with a conditioning vector, the mean of all possible &#8220;brown dogs walking in the park on pink leashes with sky in the background&#8221; images will be a messy blur because many aspects of the above description are unconstrained: where is the dog in the image? Where is the owner? Is there grass and what color is it? A perfect mean of all these factors is a blurry image at best, and more than likely just a solid color gradient; brown to blue.</p> <p>A more appropriate loss target is the modes of a distribution. A quick refresher on modes:</p> <ul><li>The mean of a distribution is the average of all values in that distribution</li><li>The median is the center-most value in the distribution</li><li>The mode is the most frequently occurring value in the distribution</li></ul> <p> <em>Quick tangent: While the mode is technically the most frequent value of a distribution, what we are really interested in is the &#8220;most frequent &lt;n> values in a distribution&#8221;. This is much more interesting for generative models since a picture of the same dog is pretty boring, but a picture of every dog breed doing different things that dogs actually do is pretty interesting. When I say &#8220;mode&#8221; past this point, I am referring to &#8220;the most frequent &lt;n> values in a distribution&#8221;.</em></p> <p>The distributions we are working with exist in an unknowably complicated high-dimensional space. They are inherently multi-modal &#8211; which is to say that they contains millions, possibly billions of high density regions which we can consider &#8220;modes&#8221;. If we could build a generative model that can generate samples from these modes given conditioning information, we&#8217;d have something very interesting on our hands!</p> <p>The trouble with targeting modes for generative models is that they cannot be defined in as a continuous function such that they serve as a good loss function for gradient descent. Instead, you have to target them indirectly.</p> <h3>GANs</h3> <p>You&#8217;ve probably heard of GANs, or Generative Adversarial Networks. These types of NNs use a brilliant loss function which results in a generative model that targets the modes of a distribution: a &#8220;generator&#8221; network maps random noise and a conditioning input to &#8220;fake&#8221; data. A &#8220;discriminator&#8221; network classifies inputs as &#8220;real&#8221; or &#8220;fake&#8221;. An interplay between these two networks is set up which causes the generator to converge on the modes of the data distribution (since producing samples from these modes would be indistinguishable from the &#8220;real&#8221; distribution). You&#8217;ve probably also heard about the problem with GANs: &#8220;mode collapse&#8221;. This is when the generator learns to produce samples from a single mode, thus permanently &#8220;winning&#8221; the game it plays with the discriminator. </p> <h3>W(here)TF are you going with this?</h3> <p>Diffusion models are another clever way of indirectly learning the modes of a distribution! Here&#8217;s how you can think of this:</p> <ol><li>Given pure noise and a conditioning input, a generator applies a tiny shift in the distribution towards it&#8217;s perceived mean (which, as stated above, is likely an amorphous blob of brown/grey colors).</li><li>Given the output of (1), the generator applies another tiny shift in the distribution towards its newly perceived mean, which is now a slightly less amorphous blob of boring color.</li><li>Rinse and repeat (2) until there is no noise left.</li></ol> <p>Think about what happens as you work through the diffusion process: every time you take a step, you are throwing out potential values that the final image could take on. This changes the distribution and shifts the mean in the process. The result of this process is that the iterative &#8220;mean&#8221; starts to converge on a &#8220;mode&#8221;. I&#8217;ve sketched this out for you below (the bars in each step are the parts of the distribution getting cut off, the dashed line is the resulting mean):</p> <figure class="wp-block-image size-full"><img loading="lazy" width="1167" height="1085" src="https://nonint.com/wp-content/uploads/2022/10/tortoise-P6-1.png" alt="" class="wp-image-281"/></figure> <p>We&#8217;ve found a way to optimize for the mean (a totally tractable continuous function), but actually converge on a mode! This is the reason why diffusion models produces such &#8220;realistic&#8221; outputs: they are mapping noise+conditioning to data elements that actually exist (or should exist) in the underlying data distribution.</p> <p>BTW &#8211; this is also a reason why diffusion models are fairly dangerous to deploy: they have a tendency to &#8220;regurgitate&#8221; information they have seen in the training dataset. E.g. a face seen too often will become a &#8220;mode&#8221; that starts to show up in the model outputs. The same goes for artistic styles, voices, musical beats, etc.</p> <h2>Spectral Bands</h2> <p>One interesting way to think about what diffusion is doing is in terms of frequency analysis. Most engineering folks learned in college that all information can be expressed as sums of sine waves of varying frequencies, amplitudes and phases. </p> <p>The diffusion process can be thought of as recovering these signals, from the low frequencies to the high. Early on, the strong noising means that only vague &#8220;details&#8221; corresponding to low frequency information emerges. As the process continues, details from the higher frequency bands begin to emerge.</p> <p>This is a useful way of understanding diffusion because it highlights one of it&#8217;s strengths: most generative methods have a propensity to spend too much &#8220;time&#8221; optimizing high frequency details. Low frequency information only emerges by scaling the models. This is problematic because the human brain works in the opposite direction: we care about semantics (low frequencies) above all else. Who cares how fine a horse&#8217;s hair is rendered when it was given 5 legs, for example.</p> <p>Diffusion not only directly optimizes low frequency details, it also offers us a way to optimize them and measure them: you simply spend more time optimizing the losses in the highly noised quantiles of the diffusion process. This is what OpenAI does with their cosine timestep schedule, as an example.</p> <p><br>That&#8217;s all I&#8217;ve got for now. I&#8217;m sure I&#8217;m missing a few ways to grok these exciting monstrosities, but I hope you&#8217;ve walked away with a few things to think about!</p> </div><!-- .entry-content --> </article><!-- #post-270 --> </main><!-- #main --> </div><!-- #primary --> </div> </div><!-- #content --> <footer id="colophon" class="site-footer clearfix"> <div class="content-wrap"> </div> <div class="site-info"> &copy; 2024 Non_Interactive &#8211; Software &amp; ML <!-- Delete below lines to remove copyright from footer --> <span class="footer-info-right"> | Powered by <a href="https://superbthemes.com/minimalistblogger/">Minimalist Blog</a> WordPress Theme </span> <!-- Delete above lines to remove copyright from footer --> </div><!-- .site-info --> </div> </footer> </div> <!-- Off canvas menu overlay, delete to remove dark shadow --> <div id="smobile-menu" class="mobile-only"></div> <div id="mobile-menu-overlay"></div> <script type='text/javascript' src='https://nonint.com/wp-content/themes/minimalistblogger/js/navigation.js?ver=20170823' id='minimalistblogger-navigation-js'></script> <script type='text/javascript' src='https://nonint.com/wp-content/themes/minimalistblogger/js/skip-link-focus-fix.js?ver=20170823' id='minimalistblogger-skip-link-focus-fix-js'></script> <script type='text/javascript' src='https://nonint.com/wp-content/themes/minimalistblogger/js/script.js?ver=20160720' id='minimalistblogger-script-js'></script> <script type='text/javascript' src='https://nonint.com/wp-content/themes/minimalistblogger/js/jquery.flexslider.js?ver=20150423' id='flexslider-js'></script> <script type='text/javascript' src='https://nonint.com/wp-content/themes/minimalistblogger/js/accessibility.js?ver=20160720' id='minimalistblogger-accessibility-js'></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10