CINXE.COM
Text-to-image model - Wikipedia
<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Text-to-image model - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"5a442efb-8929-425e-8d18-4fe6ffea0dfc","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Text-to-image_model","wgTitle":"Text-to-image model","wgCurRevisionId":1258324969,"wgRevisionId":1258324969,"wgArticleId":71701751,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description matches Wikidata","Use mdy dates from November 2024","All articles with vague or ambiguous time","Vague or ambiguous time from October 2024","Pages using multiple image with auto scaled images","Articles with excerpts","Text-to-image generation"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Text-to-image_model","wgRelevantArticleId":71701751, "wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":20000,"wgRelatedArticlesCompat":[],"wgCentralAuthMobileDomain":false,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q113940039","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"], "GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.tablesorter.styles":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","mediawiki.page.media","site","mediawiki.page.ready","jquery.tablesorter","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips", "ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.quicksurveys.init","ext.growthExperiments.SuggestedEditSession","wikibase.sidebar.tracking"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.cite.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cjquery.tablesorter.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&only=styles&skin=vector-2022"> <script async="" src="/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&modules=site.styles&only=styles&skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/3/36/Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp/1024px-Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp.png"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="1200"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/3/36/Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp/800px-Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp.png"> <meta property="og:image:width" content="800"> <meta property="og:image:height" content="800"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/3/36/Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp/640px-Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp.png"> <meta property="og:image:width" content="640"> <meta property="og:image:height" content="640"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Text-to-image model - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Text-to-image_model"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Text-to-image_model&action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Text-to-image_model"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Text-to-image_model rootpage-Text-to-image_model skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page's font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&returnto=Text-to-image+model" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&returnto=Text-to-image+model" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&returnto=Text-to-image+model" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&returnto=Text-to-image+model" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-History" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#History"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>History</span> </div> </a> <ul id="toc-History-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Architecture_and_training" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Architecture_and_training"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Architecture and training</span> </div> </a> <ul id="toc-Architecture_and_training-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Datasets" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Datasets"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Datasets</span> </div> </a> <ul id="toc-Datasets-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Quality_evaluation" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Quality_evaluation"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Quality evaluation</span> </div> </a> <ul id="toc-Quality_evaluation-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Impact_and_applications" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Impact_and_applications"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>Impact and applications</span> </div> </a> <ul id="toc-Impact_and_applications-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-List_of_notable_text-to-image_models" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#List_of_notable_text-to-image_models"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>List of notable text-to-image models</span> </div> </a> <ul id="toc-List_of_notable_text-to-image_models-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Explanatory_notes" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Explanatory_notes"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>Explanatory notes</span> </div> </a> <ul id="toc-Explanatory_notes-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">9</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Text-to-image model</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 14 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-14" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">14 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-ar mw-list-item"><a href="https://ar.wikipedia.org/wiki/%D9%86%D9%85%D9%88%D8%B0%D8%AC_%D8%AA%D9%88%D9%84%D9%8A%D8%AF_%D8%A7%D9%84%D8%B5%D9%88%D8%B1_%D9%85%D9%86_%D8%A7%D9%84%D9%86%D8%B5" title="نموذج توليد الصور من النص – Arabic" lang="ar" hreflang="ar" data-title="نموذج توليد الصور من النص" data-language-autonym="العربية" data-language-local-name="Arabic" class="interlanguage-link-target"><span>العربية</span></a></li><li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/Model_de_text_a_imatge" title="Model de text a imatge – Catalan" lang="ca" hreflang="ca" data-title="Model de text a imatge" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%D9%85%D8%AF%D9%84_%D9%85%D8%AA%D9%86_%D8%A8%D9%87_%D8%AA%D8%B5%D9%88%DB%8C%D8%B1" title="مدل متن به تصویر – Persian" lang="fa" hreflang="fa" data-title="مدل متن به تصویر" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-fr mw-list-item"><a href="https://fr.wikipedia.org/wiki/Prompt_art" title="Prompt art – French" lang="fr" hreflang="fr" data-title="Prompt art" data-language-autonym="Français" data-language-local-name="French" class="interlanguage-link-target"><span>Français</span></a></li><li class="interlanguage-link interwiki-gl mw-list-item"><a href="https://gl.wikipedia.org/wiki/Modelo_de_texto_a_imaxe" title="Modelo de texto a imaxe – Galician" lang="gl" hreflang="gl" data-title="Modelo de texto a imaxe" data-language-autonym="Galego" data-language-local-name="Galician" class="interlanguage-link-target"><span>Galego</span></a></li><li class="interlanguage-link interwiki-id mw-list-item"><a href="https://id.wikipedia.org/wiki/Model_teks-ke-gambar" title="Model teks-ke-gambar – Indonesian" lang="id" hreflang="id" data-title="Model teks-ke-gambar" data-language-autonym="Bahasa Indonesia" data-language-local-name="Indonesian" class="interlanguage-link-target"><span>Bahasa Indonesia</span></a></li><li class="interlanguage-link interwiki-he mw-list-item"><a href="https://he.wikipedia.org/wiki/%D7%9E%D7%97%D7%95%D7%9C%D7%9C_%D7%AA%D7%9E%D7%95%D7%A0%D7%95%D7%AA" title="מחולל תמונות – Hebrew" lang="he" hreflang="he" data-title="מחולל תמונות" data-language-autonym="עברית" data-language-local-name="Hebrew" class="interlanguage-link-target"><span>עברית</span></a></li><li class="interlanguage-link interwiki-ps mw-list-item"><a href="https://ps.wikipedia.org/wiki/%D9%85%D8%AA%D9%86_%D8%AF_%D8%A7%D9%86%DA%81%D9%88%D8%B1_%D9%BE%D9%87_%D8%A8%DA%BC%D9%87" title="متن د انځور په بڼه – Pashto" lang="ps" hreflang="ps" data-title="متن د انځور په بڼه" data-language-autonym="پښتو" data-language-local-name="Pashto" class="interlanguage-link-target"><span>پښتو</span></a></li><li class="interlanguage-link interwiki-pt mw-list-item"><a href="https://pt.wikipedia.org/wiki/Modelo_de_texto_para_imagem" title="Modelo de texto para imagem – Portuguese" lang="pt" hreflang="pt" data-title="Modelo de texto para imagem" data-language-autonym="Português" data-language-local-name="Portuguese" class="interlanguage-link-target"><span>Português</span></a></li><li class="interlanguage-link interwiki-sr mw-list-item"><a href="https://sr.wikipedia.org/wiki/Model_teksta_u_sliku" title="Model teksta u sliku – Serbian" lang="sr" hreflang="sr" data-title="Model teksta u sliku" data-language-autonym="Српски / srpski" data-language-local-name="Serbian" class="interlanguage-link-target"><span>Српски / srpski</span></a></li><li class="interlanguage-link interwiki-fi mw-list-item"><a href="https://fi.wikipedia.org/wiki/Kuvageneraattori" title="Kuvageneraattori – Finnish" lang="fi" hreflang="fi" data-title="Kuvageneraattori" data-language-autonym="Suomi" data-language-local-name="Finnish" class="interlanguage-link-target"><span>Suomi</span></a></li><li class="interlanguage-link interwiki-tr mw-list-item"><a href="https://tr.wikipedia.org/wiki/Metinden_resme_model" title="Metinden resme model – Turkish" lang="tr" hreflang="tr" data-title="Metinden resme model" data-language-autonym="Türkçe" data-language-local-name="Turkish" class="interlanguage-link-target"><span>Türkçe</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/%E6%96%87%E6%9C%AC%E5%88%B0%E5%9C%96%E5%83%8F%E7%94%9F%E6%88%90%E6%A8%A1%E5%9E%8B" title="文本到圖像生成模型 – Cantonese" lang="yue" hreflang="yue" data-title="文本到圖像生成模型" data-language-autonym="粵語" data-language-local-name="Cantonese" class="interlanguage-link-target"><span>粵語</span></a></li><li class="interlanguage-link interwiki-zh mw-list-item"><a href="https://zh.wikipedia.org/wiki/%E6%96%87%E6%9C%AC%E5%88%B0%E5%9B%BE%E5%83%8F%E7%94%9F%E6%88%90%E6%A8%A1%E5%9E%8B" title="文本到图像生成模型 – Chinese" lang="zh" hreflang="zh" data-title="文本到图像生成模型" data-language-autonym="中文" data-language-local-name="Chinese" class="interlanguage-link-target"><span>中文</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q113940039#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Text-to-image_model" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Text-to-image_model" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Text-to-image_model"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Text-to-image_model&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Text-to-image_model&action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Text-to-image_model"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Text-to-image_model&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Text-to-image_model&action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Text-to-image_model" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Text-to-image_model" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Text-to-image_model&oldid=1258324969" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Text-to-image_model&action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&page=Text-to-image_model&id=1258324969&wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FText-to-image_model"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FText-to-image_model"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&page=Text-to-image_model&action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Text-to-image_model&printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="wb-otherproject-link wb-otherproject-commons mw-list-item"><a href="https://commons.wikimedia.org/wiki/Category:Text-to-image_generation" hreflang="en"><span>Wikimedia Commons</span></a></li><li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q113940039" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Machine learning model</div> <p class="mw-empty-elt"> </p> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Astronaut_Riding_a_Horse_Hiroshige_(SD3.5).webp" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/3/36/Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp/220px-Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp.png" decoding="async" width="220" height="220" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/36/Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp/330px-Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/36/Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp/440px-Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp.png 2x" data-file-width="1024" data-file-height="1024" /></a><figcaption>An image conditioned on the prompt "an astronaut riding a horse, by <a href="/wiki/Hiroshige" title="Hiroshige">Hiroshige</a>", generated by <a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a> 3.5, a large-scale text-to-image model first released in 2022</figcaption></figure> <p>A <b>text-to-image model</b> is a <a href="/wiki/Machine_learning_model" class="mw-redirect" title="Machine learning model">machine learning model</a> which takes an input <a href="/wiki/Natural_language" title="Natural language">natural language</a> description and produces an image matching that description. </p><p>Text-to-image models began to be developed in the mid-2010s during the beginnings of the <a href="/wiki/AI_boom" title="AI boom">AI boom</a>, as a result of advances in <a href="/wiki/Deep_learning" title="Deep learning">deep neural networks</a>. In 2022, the output of state-of-the-art text-to-image models—such as OpenAI's <a href="/wiki/DALL-E_2" class="mw-redirect" title="DALL-E 2">DALL-E 2</a>, <a href="/wiki/Google_Brain" title="Google Brain">Google Brain</a>'s <a href="/wiki/Google_Brain#Text-to-image_model" title="Google Brain">Imagen</a>, Stability AI's <a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a>, and <a href="/wiki/Midjourney" title="Midjourney">Midjourney</a>—began to be considered to approach the quality of <a href="/wiki/Photograph" title="Photograph">real photographs</a> and human-drawn <a href="/wiki/Folk_art" title="Folk art">art</a>. </p><p>Text-to-image models are generally <a href="/wiki/Latent_diffusion_model" title="Latent diffusion model">latent diffusion models</a>, which combine a <a href="/wiki/Language_model" title="Language model">language model</a>, which transforms the input text into a <a href="/wiki/Latent_variable_model" title="Latent variable model">latent representation</a>, and a <a href="/wiki/Generative_model" title="Generative model">generative image model</a>, which produces an image conditioned on that representation. The most effective models have generally been trained on massive amounts of image and text data <a href="/wiki/Web_scraping" title="Web scraping">scraped from the web</a>.<sup id="cite_ref-imagen-verge_1-0" class="reference"><a href="#cite_note-imagen-verge-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="History">History</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Text-to-image_model&action=edit&section=1" title="Edit section: History"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Before the rise of <a href="/wiki/Deep_learning" title="Deep learning">deep learning</a>,<sup class="noprint Inline-Template" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Manual_of_Style/Dates_and_numbers#Chronological_items" title="Wikipedia:Manual of Style/Dates and numbers"><span title="The time period mentioned near this tag is ambiguous. (October 2024)">when?</span></a></i>]</sup> attempts to build text-to-image models were limited to <a href="/wiki/Collage" title="Collage">collages</a> by arranging existing component images, such as from a database of <a href="/wiki/Clip_art" title="Clip art">clip art</a>.<sup id="cite_ref-agnese_2-0" class="reference"><a href="#cite_note-agnese-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-zhu-2007_3-0" class="reference"><a href="#cite_note-zhu-2007-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup> </p><p>The inverse task, <a href="/wiki/Natural_language_generation" title="Natural language generation">image captioning</a>, was more tractable, and a number of image captioning deep learning models came prior to the first text-to-image models.<sup id="cite_ref-mansimov-2015_4-0" class="reference"><a href="#cite_note-mansimov-2015-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup> </p><p>The first modern text-to-image model, alignDRAW, was introduced in 2015 by researchers from the <a href="/wiki/University_of_Toronto" title="University of Toronto">University of Toronto</a>. alignDRAW extended the previously-introduced DRAW architecture (which used a <a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">recurrent</a> <a href="/wiki/Variational_autoencoder" title="Variational autoencoder">variational autoencoder</a> with an <a href="/wiki/Attention_mechanism" class="mw-redirect" title="Attention mechanism">attention mechanism</a>) to be conditioned on text sequences.<sup id="cite_ref-mansimov-2015_4-1" class="reference"><a href="#cite_note-mansimov-2015-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup> Images generated by alignDRAW were in small <a href="/wiki/Image_resolution" title="Image resolution">resolution</a> (32×32 pixels, attained from <a href="/wiki/Image_scaling" title="Image scaling">resizing</a>) and were considered to be 'low in diversity'. The model was able to generalize to objects not represented in the training data (such as a red school bus) and appropriately handled novel prompts such as "a stop sign is flying in blue skies", exhibiting output that it was not merely "memorizing" data from the <a href="/wiki/Data_set" title="Data set">training set</a>.<sup id="cite_ref-mansimov-2015_4-2" class="reference"><a href="#cite_note-mansimov-2015-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-reed-2016_5-0" class="reference"><a href="#cite_note-reed-2016-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup> </p> <div class="thumb tright" style=""><div class="thumbinner" style="width:270px"><div class="thumbimage noresize" style="width:268px;"> <span typeof="mw:File"><a href="/wiki/File:AlignDRAW_-_Flying_stop_sign.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/4/48/AlignDRAW_-_Flying_stop_sign.png" decoding="async" width="268" height="132" class="mw-file-element" data-file-width="134" data-file-height="66" /></a></span></div><div class="thumbcaption">Eight images generated from the text prompt "A stop sign is flying in blue skies." by AlignDRAW (2015). Enlarged to show detail.<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">[</span>6<span class="cite-bracket">]</span></a></sup></div></div></div> <p>In 2016, Reed, Akata, Yan et al. became the first to use <a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">generative adversarial networks</a> for the text-to-image task.<sup id="cite_ref-reed-2016_5-1" class="reference"><a href="#cite_note-reed-2016-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-frolov_7-0" class="reference"><a href="#cite_note-frolov-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> With models trained on narrow, domain-specific datasets, they were able to generate "visually plausible" images of birds and flowers from text captions like <i>"an all black bird with a distinct thick, rounded bill"</i>. A model trained on the more diverse <a href="/wiki/COCO_(dataset)" class="mw-redirect" title="COCO (dataset)">COCO</a> (Common Objects in Context) dataset produced images which were "from a distance... encouraging", but which lacked coherence in their details.<sup id="cite_ref-reed-2016_5-2" class="reference"><a href="#cite_note-reed-2016-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup> Later systems include VQGAN-CLIP,<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup> XMC-GAN, and GauGAN2.<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span class="cite-bracket">[</span>9<span class="cite-bracket">]</span></a></sup> </p> <style data-mw-deduplicate="TemplateStyles:r1237032888/mw-parser-output/.tmulti">.mw-parser-output .tmulti .multiimageinner{display:flex;flex-direction:column}.mw-parser-output .tmulti .trow{display:flex;flex-direction:row;clear:left;flex-wrap:wrap;width:100%;box-sizing:border-box}.mw-parser-output .tmulti .tsingle{margin:1px;float:left}.mw-parser-output .tmulti .theader{clear:both;font-weight:bold;text-align:center;align-self:center;background-color:transparent;width:100%}.mw-parser-output .tmulti .thumbcaption{background-color:transparent}.mw-parser-output .tmulti .text-align-left{text-align:left}.mw-parser-output .tmulti .text-align-right{text-align:right}.mw-parser-output .tmulti .text-align-center{text-align:center}@media all and (max-width:720px){.mw-parser-output .tmulti .thumbinner{width:100%!important;box-sizing:border-box;max-width:none!important;align-items:center}.mw-parser-output .tmulti .trow{justify-content:center}.mw-parser-output .tmulti .tsingle{float:none!important;max-width:100%!important;box-sizing:border-box;text-align:center}.mw-parser-output .tmulti .tsingle .thumbcaption{text-align:left}.mw-parser-output .tmulti .trow>.thumbcaption{text-align:center}}@media screen{html.skin-theme-clientpref-night .mw-parser-output .tmulti .multiimageinner img{background-color:white}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .tmulti .multiimageinner img{background-color:white}}</style><div class="thumb tmulti tright"><div class="thumbinner multiimageinner" style="width:242px;max-width:242px"><div class="trow"><div class="tsingle" style="width:119px;max-width:119px"><div class="thumbimage" style="height:117px;overflow:hidden"><span typeof="mw:File"><a href="/wiki/File:Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-03.png" class="mw-file-description"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/91/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-03.png/117px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-03.png" decoding="async" width="117" height="117" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/91/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-03.png/176px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-03.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/91/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-03.png/234px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-03.png 2x" data-file-width="1024" data-file-height="1024" /></a></span></div></div><div class="tsingle" style="width:119px;max-width:119px"><div class="thumbimage" style="height:117px;overflow:hidden"><span typeof="mw:File"><a href="/wiki/File:Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-01.png" class="mw-file-description"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/6/6e/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-01.png/117px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-01.png" decoding="async" width="117" height="117" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/6/6e/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-01.png/176px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-01.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/6/6e/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-01.png/234px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e2-01.png 2x" data-file-width="1024" data-file-height="1024" /></a></span></div></div></div><div class="trow"><div class="tsingle" style="width:119px;max-width:119px"><div class="thumbimage" style="height:117px;overflow:hidden"><span typeof="mw:File"><a href="/wiki/File:Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-01.png" class="mw-file-description"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9a/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-01.png/117px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-01.png" decoding="async" width="117" height="117" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9a/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-01.png/176px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-01.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9a/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-01.png/234px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-01.png 2x" data-file-width="1024" data-file-height="1024" /></a></span></div></div><div class="tsingle" style="width:119px;max-width:119px"><div class="thumbimage" style="height:117px;overflow:hidden"><span typeof="mw:File"><a href="/wiki/File:Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-02.png" class="mw-file-description"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-02.png/117px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-02.png" decoding="async" width="117" height="117" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-02.png/176px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-02.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-02.png/234px-Image_generator-A_stop_sign_is_flying_in_blue_skies-Dall-e3-02.png 2x" data-file-width="1024" data-file-height="1024" /></a></span></div></div></div><div class="trow" style="display:flex"><div class="thumbcaption">DALL·E 2's (top, April 2022) and DALL·E 3's (bottom, September 2023) generated images for the prompt "A stop sign is flying in blue skies"</div></div></div></div> <p>One of the first text-to-image models to capture widespread public attention was <a href="/wiki/OpenAI" title="OpenAI">OpenAI</a>'s <a href="/wiki/DALL-E" title="DALL-E">DALL-E</a>, a <a href="/wiki/Transformer_(machine_learning_model)" class="mw-redirect" title="Transformer (machine learning model)">transformer</a> system announced in January 2021.<sup id="cite_ref-tc-dalle_10-0" class="reference"><a href="#cite_note-tc-dalle-10"><span class="cite-bracket">[</span>10<span class="cite-bracket">]</span></a></sup> A successor capable of generating more complex and realistic images, DALL-E 2, was unveiled in April 2022,<sup id="cite_ref-tc-dalle-2_11-0" class="reference"><a href="#cite_note-tc-dalle-2-11"><span class="cite-bracket">[</span>11<span class="cite-bracket">]</span></a></sup> followed by <a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a> that was publicly released in August 2022.<sup id="cite_ref-12" class="reference"><a href="#cite_note-12"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup> In August 2022, <a href="/wiki/Text-to-image_personalization" title="Text-to-image personalization">text-to-image personalization</a> allows to teach the model a new concept using a small set of images of a new object that was not included in the training set of the text-to-image foundation model. This is achieved by <a href="/wiki/Text-to-image_personalization" title="Text-to-image personalization">textual inversion</a>, namely, finding a new text term that correspond to these images. </p><p>Following other text-to-image models, <a href="/wiki/Language_model" title="Language model">language model</a>-powered <a href="/wiki/Text-to-video" class="mw-redirect" title="Text-to-video">text-to-video</a> platforms such as Runway, Make-A-Video,<sup id="cite_ref-13" class="reference"><a href="#cite_note-13"><span class="cite-bracket">[</span>13<span class="cite-bracket">]</span></a></sup> Imagen Video,<sup id="cite_ref-14" class="reference"><a href="#cite_note-14"><span class="cite-bracket">[</span>14<span class="cite-bracket">]</span></a></sup> Midjourney,<sup id="cite_ref-15" class="reference"><a href="#cite_note-15"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup> and Phenaki<sup id="cite_ref-16" class="reference"><a href="#cite_note-16"><span class="cite-bracket">[</span>16<span class="cite-bracket">]</span></a></sup> can generate video from text and/or text/image prompts.<sup id="cite_ref-17" class="reference"><a href="#cite_note-17"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Architecture_and_training">Architecture and training</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Text-to-image_model&action=edit&section=2" title="Edit section: Architecture and training"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:State_of_AI_Art_Machine_Learning_Models.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/1/13/State_of_AI_Art_Machine_Learning_Models.svg/220px-State_of_AI_Art_Machine_Learning_Models.svg.png" decoding="async" width="220" height="132" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/1/13/State_of_AI_Art_Machine_Learning_Models.svg/330px-State_of_AI_Art_Machine_Learning_Models.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/1/13/State_of_AI_Art_Machine_Learning_Models.svg/440px-State_of_AI_Art_Machine_Learning_Models.svg.png 2x" data-file-width="512" data-file-height="307" /></a><figcaption>High-level architecture showing the state of AI art machine learning models, and notable models and applications as a clickable SVG image map</figcaption></figure> <p>Text-to-image models have been built using a variety of architectures. The text encoding step may be performed with a <a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">recurrent neural network</a> such as a <a href="/wiki/Long_short-term_memory" title="Long short-term memory">long short-term memory</a> (LSTM) network, though <a href="/wiki/Transformer_(machine_learning_model)" class="mw-redirect" title="Transformer (machine learning model)">transformer</a> models have since become a more popular option. For the image generation step, conditional <a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">generative adversarial networks</a> (GANs) have been commonly used, with <a href="/wiki/Diffusion_model" title="Diffusion model">diffusion models</a> also becoming a popular option in recent years. Rather than directly training a model to output a high-resolution image conditioned on a text embedding, a popular technique is to train a model to generate low-resolution images, and use one or more auxiliary deep learning models to upscale it, filling in finer details. </p><p>Text-to-image models are trained on large datasets of (text, image) pairs, often scraped from the web. With their 2022 Imagen model, Google Brain reported positive results from using a <a href="/wiki/Large_language_model" title="Large language model">large language model</a> trained separately on a text-only corpus (with its weights subsequently frozen), a departure from the theretofore standard approach.<sup id="cite_ref-imagen-paper_18-0" class="reference"><a href="#cite_note-imagen-paper-18"><span class="cite-bracket">[</span>18<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Datasets">Datasets</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Text-to-image_model&action=edit&section=3" title="Edit section: Datasets"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Captioned_image_dataset_examples.jpg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/0/0f/Captioned_image_dataset_examples.jpg/220px-Captioned_image_dataset_examples.jpg" decoding="async" width="220" height="275" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/0/0f/Captioned_image_dataset_examples.jpg/330px-Captioned_image_dataset_examples.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/0/0f/Captioned_image_dataset_examples.jpg/440px-Captioned_image_dataset_examples.jpg 2x" data-file-width="1770" data-file-height="2209" /></a><figcaption>Examples of images and captions from three public datasets which are commonly used to train text-to-image models</figcaption></figure> <p>Training a text-to-image model requires a dataset of images paired with text captions. One dataset commonly used for this purpose is the COCO dataset. Released by Microsoft in 2014, COCO consists of around 123,000 images depicting a diversity of objects with five captions per image, generated by human annotators. Oxford-120 Flowers and CUB-200 Birds are smaller datasets of around 10,000 images each, restricted to flowers and birds, respectively. It is considered less difficult to train a high-quality text-to-image model with these datasets because of their narrow range of subject matter.<sup id="cite_ref-frolov_7-1" class="reference"><a href="#cite_note-frolov-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Quality_evaluation">Quality evaluation</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Text-to-image_model&action=edit&section=4" title="Edit section: Quality evaluation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Evaluating and comparing the quality of text-to-image models is a problem involving assessing multiple desirable properties. A desideratum specific to text-to-image models is that generated images semantically align with the text captions used to generate them. A number of schemes have been devised for assessing these qualities, some automated and others based on human judgement.<sup id="cite_ref-frolov_7-2" class="reference"><a href="#cite_note-frolov-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> </p><p>A common algorithmic metric for assessing image quality and diversity is the <a href="/wiki/Inception_score" title="Inception score">Inception Score</a> (IS), which is based on the distribution of labels predicted by a pretrained <a href="/wiki/Inceptionv3" class="mw-redirect" title="Inceptionv3">Inceptionv3</a> <a href="/wiki/Computer_vision" title="Computer vision">image classification model</a> when applied to a sample of images generated by the text-to-image model. The score is increased when the image classification model predicts a single label with high probability, a scheme intended to favour "distinct" generated images. Another popular metric is the related <a href="/wiki/Fr%C3%A9chet_inception_distance" title="Fréchet inception distance">Fréchet inception distance</a>, which compares the distribution of generated images and real training images according to features extracted by one of the final layers of a pretrained image classification model.<sup id="cite_ref-frolov_7-3" class="reference"><a href="#cite_note-frolov-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Impact_and_applications">Impact and applications</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Text-to-image_model&action=edit&section=5" title="Edit section: Impact and applications"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="excerpt-block"><style data-mw-deduplicate="TemplateStyles:r1066933788">.mw-parser-output .excerpt-hat .mw-editsection-like{font-style:normal}</style><style data-mw-deduplicate="TemplateStyles:r1236090951">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}@media print{body.ns-0 .mw-parser-output .hatnote{display:none!important}}</style><div role="note" class="hatnote navigation-not-searchable dablink excerpt-hat selfref">This section is an excerpt from <a href="/wiki/Artificial_intelligence_art#Impact_and_applications" title="Artificial intelligence art">Artificial intelligence art § Impact and applications</a>.<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span><a class="external text" href="https://en.wikipedia.org/w/index.php?title=Artificial_intelligence_art&action=edit">edit</a><span class="mw-editsection-bracket">]</span></span></div><div class="excerpt"> AI has the potential for a <a href="/wiki/Societal_transformation" title="Societal transformation">societal transformation</a>, which may include enabling the expansion of noncommercial niche genres (such as <a href="/wiki/Cyberpunk_derivatives" title="Cyberpunk derivatives">cyberpunk derivatives</a> like <a href="/wiki/Solarpunk" title="Solarpunk">solarpunk</a>) by amateurs, novel entertainment, fast prototyping,<sup id="cite_ref-Artificial_intelligence_art_computerworld_19-0" class="reference"><a href="#cite_note-Artificial_intelligence_art_computerworld-19"><span class="cite-bracket">[</span>19<span class="cite-bracket">]</span></a></sup> increasing art-making accessibility,<sup id="cite_ref-Artificial_intelligence_art_computerworld_19-1" class="reference"><a href="#cite_note-Artificial_intelligence_art_computerworld-19"><span class="cite-bracket">[</span>19<span class="cite-bracket">]</span></a></sup> and artistic output per effort and/or expenses and/or time<sup id="cite_ref-Artificial_intelligence_art_computerworld_19-2" class="reference"><a href="#cite_note-Artificial_intelligence_art_computerworld-19"><span class="cite-bracket">[</span>19<span class="cite-bracket">]</span></a></sup>—e.g., via generating drafts, draft-refinitions, and image components (<a href="/wiki/Inpainting" title="Inpainting">inpainting</a>). Generated images are sometimes used as sketches,<sup id="cite_ref-Artificial_intelligence_art_nytimesRoose_20-0" class="reference"><a href="#cite_note-Artificial_intelligence_art_nytimesRoose-20"><span class="cite-bracket">[</span>20<span class="cite-bracket">]</span></a></sup> low-cost experiments,<sup id="cite_ref-Artificial_intelligence_art_CNBCLeswing_21-0" class="reference"><a href="#cite_note-Artificial_intelligence_art_CNBCLeswing-21"><span class="cite-bracket">[</span>21<span class="cite-bracket">]</span></a></sup> inspiration, or illustrations of <a href="/wiki/Proof-of-concept" class="mw-redirect" title="Proof-of-concept">proof-of-concept</a>-stage ideas. Additional functionalities or improvements may also relate to post-generation manual editing (i.e., polishing), such as subsequent tweaking with an image editor.<sup id="cite_ref-Artificial_intelligence_art_CNBCLeswing_21-1" class="reference"><a href="#cite_note-Artificial_intelligence_art_CNBCLeswing-21"><span class="cite-bracket">[</span>21<span class="cite-bracket">]</span></a></sup></div></div> <div class="mw-heading mw-heading2"><h2 id="List_of_notable_text-to-image_models">List of notable text-to-image models</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Text-to-image_model&action=edit&section=6" title="Edit section: List of notable text-to-image models"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <table class="wikitable sortable"> <caption> </caption> <tbody><tr> <th>Name </th> <th>Release date </th> <th>Developer </th> <th>License </th></tr> <tr> <td><a href="/wiki/DALL-E" title="DALL-E">DALL-E</a> </td> <td>January 2021 </td> <td rowspan="3"><a href="/wiki/OpenAI" title="OpenAI">OpenAI</a> </td> <td rowspan="10">Proprietary </td></tr> <tr> <td><a href="/wiki/DALL-E" title="DALL-E">DALL-E 2</a> </td> <td>April 2022 </td></tr> <tr> <td><a href="/wiki/DALL-E" title="DALL-E">DALL-E 3</a> </td> <td>September 2023 </td></tr> <tr> <td><a href="/wiki/Ideogram_(text-to-image_model)" title="Ideogram (text-to-image model)">Ideogram 2.0</a> </td> <td>August 2024 </td> <td>Ideogram </td></tr> <tr> <td><a href="/wiki/Google_Brain#Text-to-image_model" title="Google Brain">Imagen</a> </td> <td>April 2023 </td> <td rowspan="4"><a href="/wiki/Google" title="Google">Google</a> </td></tr> <tr> <td>Imagen 2 </td> <td>December 2023<sup id="cite_ref-22" class="reference"><a href="#cite_note-22"><span class="cite-bracket">[</span>22<span class="cite-bracket">]</span></a></sup> </td></tr> <tr> <td>Imagen 3 </td> <td>May 2024 </td></tr> <tr> <td><a href="/wiki/Google_Brain#Text-to-image_model" title="Google Brain">Parti</a> </td> <td>Unreleased </td></tr> <tr> <td><a href="/wiki/Adobe_Firefly" title="Adobe Firefly">Firefly</a> </td> <td>March 2023 </td> <td><a href="/wiki/Adobe_Inc." title="Adobe Inc.">Adobe Inc.</a> </td></tr> <tr> <td><a href="/wiki/Midjourney" title="Midjourney">Midjourney</a> </td> <td>July 2022 </td> <td>Midjourney, Inc. </td></tr> <tr> <td><a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a> </td> <td>August 2022 </td> <td>Stability AI </td> <td><a rel="nofollow" class="external text" href="https://stability.ai/community-license-agreement">Stability AI Community License</a><sup id="cite_ref-23" class="reference"><a href="#cite_note-23"><span class="cite-bracket">[</span>note 1<span class="cite-bracket">]</span></a></sup> </td></tr> <tr> <td><a href="/wiki/Flux_(text-to-image_model)" title="Flux (text-to-image model)">Flux</a> </td> <td>August 2024 </td> <td>Black Forest Labs </td> <td><a href="/wiki/Apache_License" title="Apache License">Apache License</a><sup id="cite_ref-24" class="reference"><a href="#cite_note-24"><span class="cite-bracket">[</span>note 2<span class="cite-bracket">]</span></a></sup> </td></tr> <tr> <td><a href="/wiki/RunwayML" class="mw-redirect" title="RunwayML">RunwayML</a> </td> <td>2018 </td> <td>Runway AI, Inc. </td> <td>Proprietary </td></tr></tbody></table> <div class="mw-heading mw-heading2"><h2 id="Explanatory_notes">Explanatory notes</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Text-to-image_model&action=edit&section=7" title="Edit section: Explanatory notes"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist"> <div class="mw-references-wrap"><ol class="references"> <li id="cite_note-23"><span class="mw-cite-backlink"><b><a href="#cite_ref-23">^</a></b></span> <span class="reference-text">This license can be used by individuals and organizations up to $1 million in revenue, for organizations with annual revenue more than $1 million, Stability AI Enterprise License is needed. All outputs are retained by users regardless of revenue</span> </li> <li id="cite_note-24"><span class="mw-cite-backlink"><b><a href="#cite_ref-24">^</a></b></span> <span class="reference-text">For the schnell model, the dev model is using a non-commercial license while the pro model is proprietary (only available as <a href="/wiki/Application_programming_interface" class="mw-redirect" title="Application programming interface">API</a>)</span> </li> </ol></div></div> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Text-to-image_model&action=edit&section=8" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a href="/wiki/Artificial_intelligence_art" title="Artificial intelligence art">Artificial intelligence art</a></li></ul> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Text-to-image_model&action=edit&section=9" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239543626"><div class="reflist"> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-imagen-verge-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-imagen-verge_1-0">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite id="CITEREFVincent2022" class="citation news cs1">Vincent, James (May 24, 2022). <a rel="nofollow" class="external text" href="https://www.theverge.com/2022/5/24/23139297/google-imagen-text-to-image-ai-system-examples-paper">"All these images were generated by Google's latest text-to-image AI"</a>. <i>The Verge</i>. Vox Media<span class="reference-accessdate">. Retrieved <span class="nowrap">May 28,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+Verge&rft.atitle=All+these+images+were+generated+by+Google%27s+latest+text-to-image+AI&rft.date=2022-05-24&rft.aulast=Vincent&rft.aufirst=James&rft_id=https%3A%2F%2Fwww.theverge.com%2F2022%2F5%2F24%2F23139297%2Fgoogle-imagen-text-to-image-ai-system-examples-paper&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-agnese-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-agnese_2-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFAgneseHerreraTaoZhu2019" class="citation cs2">Agnese, Jorge; Herrera, Jonathan; Tao, Haicheng; Zhu, Xingquan (October 2019), <i>A Survey and Taxonomy of Adversarial Neural Networks for Text-to-Image Synthesis</i>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1910.09399">1910.09399</a></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=A+Survey+and+Taxonomy+of+Adversarial+Neural+Networks+for+Text-to-Image+Synthesis&rft.date=2019-10&rft_id=info%3Aarxiv%2F1910.09399&rft.aulast=Agnese&rft.aufirst=Jorge&rft.au=Herrera%2C+Jonathan&rft.au=Tao%2C+Haicheng&rft.au=Zhu%2C+Xingquan&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-zhu-2007-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-zhu-2007_3-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFZhuGoldbergEldawyDyer2007" class="citation journal cs1">Zhu, Xiaojin; Goldberg, Andrew B.; Eldawy, Mohamed; Dyer, Charles R.; Strock, Bradley (2007). <a rel="nofollow" class="external text" href="https://www.aaai.org/Papers/AAAI/2007/AAAI07-252.pdf">"A text-to-picture synthesis system for augmenting communication"</a> <span class="cs1-format">(PDF)</span>. <i>AAAI</i>. <b>7</b>: 1590–1595.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=AAAI&rft.atitle=A+text-to-picture+synthesis+system+for+augmenting+communication&rft.volume=7&rft.pages=1590-1595&rft.date=2007&rft.aulast=Zhu&rft.aufirst=Xiaojin&rft.au=Goldberg%2C+Andrew+B.&rft.au=Eldawy%2C+Mohamed&rft.au=Dyer%2C+Charles+R.&rft.au=Strock%2C+Bradley&rft_id=https%3A%2F%2Fwww.aaai.org%2FPapers%2FAAAI%2F2007%2FAAAI07-252.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-mansimov-2015-4"><span class="mw-cite-backlink">^ <a href="#cite_ref-mansimov-2015_4-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-mansimov-2015_4-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-mansimov-2015_4-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFMansimovParisottoLei_BaSalakhutdinov2015" class="citation journal cs1">Mansimov, Elman; Parisotto, Emilio; Lei Ba, Jimmy; Salakhutdinov, Ruslan (November 2015). "Generating Images from Captions with Attention". <i>ICLR</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1511.02793">1511.02793</a></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=ICLR&rft.atitle=Generating+Images+from+Captions+with+Attention&rft.date=2015-11&rft_id=info%3Aarxiv%2F1511.02793&rft.aulast=Mansimov&rft.aufirst=Elman&rft.au=Parisotto%2C+Emilio&rft.au=Lei+Ba%2C+Jimmy&rft.au=Salakhutdinov%2C+Ruslan&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-reed-2016-5"><span class="mw-cite-backlink">^ <a href="#cite_ref-reed-2016_5-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-reed-2016_5-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-reed-2016_5-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFReedAkataLogeswaranSchiele2016" class="citation journal cs1">Reed, Scott; Akata, Zeynep; Logeswaran, Lajanugen; Schiele, Bernt; Lee, Honglak (June 2016). <a rel="nofollow" class="external text" href="http://proceedings.mlr.press/v48/reed16.pdf">"Generative Adversarial Text to Image Synthesis"</a> <span class="cs1-format">(PDF)</span>. <i>International Conference on Machine Learning</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1605.05396">1605.05396</a></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=International+Conference+on+Machine+Learning&rft.atitle=Generative+Adversarial+Text+to+Image+Synthesis&rft.date=2016-06&rft_id=info%3Aarxiv%2F1605.05396&rft.aulast=Reed&rft.aufirst=Scott&rft.au=Akata%2C+Zeynep&rft.au=Logeswaran%2C+Lajanugen&rft.au=Schiele%2C+Bernt&rft.au=Lee%2C+Honglak&rft_id=http%3A%2F%2Fproceedings.mlr.press%2Fv48%2Freed16.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFMansimovParisottoBaSalakhutdinov2016" class="citation journal cs1">Mansimov, Elman; Parisotto, Emilio; Ba, Jimmy Lei; Salakhutdinov, Ruslan (February 29, 2016). "Generating Images from Captions with Attention". <i>International Conference on Learning Representations</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1511.02793">1511.02793</a></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=International+Conference+on+Learning+Representations&rft.atitle=Generating+Images+from+Captions+with+Attention&rft.date=2016-02-29&rft_id=info%3Aarxiv%2F1511.02793&rft.aulast=Mansimov&rft.aufirst=Elman&rft.au=Parisotto%2C+Emilio&rft.au=Ba%2C+Jimmy+Lei&rft.au=Salakhutdinov%2C+Ruslan&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-frolov-7"><span class="mw-cite-backlink">^ <a href="#cite_ref-frolov_7-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-frolov_7-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-frolov_7-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-frolov_7-3"><sup><i><b>d</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFFrolovHinzRaueHees2021" class="citation journal cs1">Frolov, Stanislav; Hinz, Tobias; Raue, Federico; Hees, Jörn; Dengel, Andreas (December 2021). <a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.neunet.2021.07.019">"Adversarial text-to-image synthesis: A review"</a>. <i>Neural Networks</i>. <b>144</b>: 187–209. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2101.09983">2101.09983</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.neunet.2021.07.019">10.1016/j.neunet.2021.07.019</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a> <a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/34500257">34500257</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a> <a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:231698782">231698782</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Neural+Networks&rft.atitle=Adversarial+text-to-image+synthesis%3A+A+review&rft.volume=144&rft.pages=187-209&rft.date=2021-12&rft_id=info%3Aarxiv%2F2101.09983&rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A231698782%23id-name%3DS2CID&rft_id=info%3Apmid%2F34500257&rft_id=info%3Adoi%2F10.1016%2Fj.neunet.2021.07.019&rft.aulast=Frolov&rft.aufirst=Stanislav&rft.au=Hinz%2C+Tobias&rft.au=Raue%2C+Federico&rft.au=Hees%2C+J%C3%B6rn&rft.au=Dengel%2C+Andreas&rft_id=https%3A%2F%2Fdoi.org%2F10.1016%252Fj.neunet.2021.07.019&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFRodriguez2022" class="citation web cs1">Rodriguez, Jesus (September 27, 2022). <a rel="nofollow" class="external text" href="https://thesequence.substack.com/p/edge229">"🌅 Edge#229: VQGAN + CLIP"</a>. <i>thesequence.substack.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">October 10,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=thesequence.substack.com&rft.atitle=%F0%9F%8C%85+Edge%23229%3A+VQGAN+%2B+CLIP&rft.date=2022-09-27&rft.aulast=Rodriguez&rft.aufirst=Jesus&rft_id=https%3A%2F%2Fthesequence.substack.com%2Fp%2Fedge229&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFRodriguez2022" class="citation web cs1">Rodriguez, Jesus (October 4, 2022). <a rel="nofollow" class="external text" href="https://thesequence.substack.com/p/edge231">"🎆🌆 Edge#231: Text-to-Image Synthesis with GANs"</a>. <i>thesequence.substack.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">October 10,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=thesequence.substack.com&rft.atitle=%F0%9F%8E%86%F0%9F%8C%86+Edge%23231%3A+Text-to-Image+Synthesis+with+GANs&rft.date=2022-10-04&rft.aulast=Rodriguez&rft.aufirst=Jesus&rft_id=https%3A%2F%2Fthesequence.substack.com%2Fp%2Fedge231&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-tc-dalle-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-tc-dalle_10-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFColdewey2021" class="citation web cs1">Coldewey, Devin (January 5, 2021). <a rel="nofollow" class="external text" href="https://techcrunch.com/2021/01/05/openais-dall-e-creates-plausible-images-of-literally-anything-you-ask-it-to/">"OpenAI's DALL-E creates plausible images of literally anything you ask it to"</a>. <i>TechCrunch</i>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=TechCrunch&rft.atitle=OpenAI%27s+DALL-E+creates+plausible+images+of+literally+anything+you+ask+it+to&rft.date=2021-01-05&rft.aulast=Coldewey&rft.aufirst=Devin&rft_id=https%3A%2F%2Ftechcrunch.com%2F2021%2F01%2F05%2Fopenais-dall-e-creates-plausible-images-of-literally-anything-you-ask-it-to%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-tc-dalle-2-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-tc-dalle-2_11-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFColdewey2022" class="citation web cs1">Coldewey, Devin (April 6, 2022). <a rel="nofollow" class="external text" href="https://techcrunch.com/2022/04/06/openais-new-dall-e-model-draws-anything-but-bigger-better-and-faster-than-before/">"OpenAI's new DALL-E model draws anything — but bigger, better and faster than before"</a>. <i>TechCrunch</i>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=TechCrunch&rft.atitle=OpenAI%27s+new+DALL-E+model+draws+anything+%E2%80%94+but+bigger%2C+better+and+faster+than+before&rft.date=2022-04-06&rft.aulast=Coldewey&rft.aufirst=Devin&rft_id=https%3A%2F%2Ftechcrunch.com%2F2022%2F04%2F06%2Fopenais-new-dall-e-model-draws-anything-but-bigger-better-and-faster-than-before%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-12">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://stability.ai/blog/stable-diffusion-public-release">"Stable Diffusion Public Release"</a>. <i>Stability.Ai</i><span class="reference-accessdate">. Retrieved <span class="nowrap">October 27,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Stability.Ai&rft.atitle=Stable+Diffusion+Public+Release&rft_id=https%3A%2F%2Fstability.ai%2Fblog%2Fstable-diffusion-public-release&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-13"><span class="mw-cite-backlink"><b><a href="#cite_ref-13">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFKumar2022" class="citation web cs1">Kumar, Ashish (October 3, 2022). <a rel="nofollow" class="external text" href="https://www.marktechpost.com/2022/10/03/meta-ai-introduces-make-a-video-an-artificial-intelligence-system-that-generates-videos-from-text/">"Meta AI Introduces 'Make-A-Video': An Artificial Intelligence System That Generates Videos From Text"</a>. <i>MarkTechPost</i><span class="reference-accessdate">. Retrieved <span class="nowrap">October 3,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=MarkTechPost&rft.atitle=Meta+AI+Introduces+%27Make-A-Video%27%3A+An+Artificial+Intelligence+System+That+Generates+Videos+From+Text&rft.date=2022-10-03&rft.aulast=Kumar&rft.aufirst=Ashish&rft_id=https%3A%2F%2Fwww.marktechpost.com%2F2022%2F10%2F03%2Fmeta-ai-introduces-make-a-video-an-artificial-intelligence-system-that-generates-videos-from-text%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-14"><span class="mw-cite-backlink"><b><a href="#cite_ref-14">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFEdwards2022" class="citation web cs1">Edwards, Benj (October 5, 2022). <a rel="nofollow" class="external text" href="https://arstechnica.com/information-technology/2022/10/googles-newest-ai-generator-creates-hd-video-from-text-prompts/">"Google's newest AI generator creates HD video from text prompts"</a>. <i>Ars Technica</i><span class="reference-accessdate">. Retrieved <span class="nowrap">October 25,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Ars+Technica&rft.atitle=Google%27s+newest+AI+generator+creates+HD+video+from+text+prompts&rft.date=2022-10-05&rft.aulast=Edwards&rft.aufirst=Benj&rft_id=https%3A%2F%2Farstechnica.com%2Finformation-technology%2F2022%2F10%2Fgoogles-newest-ai-generator-creates-hd-video-from-text-prompts%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-15"><span class="mw-cite-backlink"><b><a href="#cite_ref-15">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFRodriguez2022" class="citation web cs1">Rodriguez, Jesus (October 25, 2022). <a rel="nofollow" class="external text" href="https://thesequence.substack.com/p/edge237">"🎨 Edge#237: What is Midjourney?"</a>. <i>thesequence.substack.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">October 26,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=thesequence.substack.com&rft.atitle=%F0%9F%8E%A8+Edge%23237%3A+What+is+Midjourney%3F&rft.date=2022-10-25&rft.aulast=Rodriguez&rft.aufirst=Jesus&rft_id=https%3A%2F%2Fthesequence.substack.com%2Fp%2Fedge237&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-16"><span class="mw-cite-backlink"><b><a href="#cite_ref-16">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://phenaki.video/?mc_cid=9fee7eeb9d&mc_eid=6e7303fddd#interactive">"Phenaki"</a>. <i>phenaki.video</i><span class="reference-accessdate">. Retrieved <span class="nowrap">October 3,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=phenaki.video&rft.atitle=Phenaki&rft_id=https%3A%2F%2Fphenaki.video%2F%3Fmc_cid%3D9fee7eeb9d%26mc_eid%3D6e7303fddd%23interactive&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-17"><span class="mw-cite-backlink"><b><a href="#cite_ref-17">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFEdwards2022" class="citation news cs1">Edwards, Benj (September 9, 2022). <a rel="nofollow" class="external text" href="https://arstechnica.com/information-technology/2022/09/runway-teases-ai-powered-text-to-video-editing-using-written-prompts/">"Runway teases AI-powered text-to-video editing using written prompts"</a>. Ars Technica<span class="reference-accessdate">. Retrieved <span class="nowrap">September 12,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.atitle=Runway+teases+AI-powered+text-to-video+editing+using+written+prompts&rft.date=2022-09-09&rft.aulast=Edwards&rft.aufirst=Benj&rft_id=https%3A%2F%2Farstechnica.com%2Finformation-technology%2F2022%2F09%2Frunway-teases-ai-powered-text-to-video-editing-using-written-prompts%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-imagen-paper-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-imagen-paper_18-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSahariaChanSaxenaLi2022" class="citation arxiv cs1">Saharia, Chitwan; Chan, William; Saxena, Saurabh; Li, Lala; Whang, Jay; Denton, Emily; Kamyar Seyed Ghasemipour, Seyed; Karagol Ayan, Burcu; Sara Mahdavi, S.; Gontijo Lopes, Rapha; Salimans, Tim; Ho, Jonathan; J Fleet, David; Norouzi, Mohammad (May 23, 2022). "Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2205.11487">2205.11487</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CV">cs.CV</a>].</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=preprint&rft.jtitle=arXiv&rft.atitle=Photorealistic+Text-to-Image+Diffusion+Models+with+Deep+Language+Understanding&rft.date=2022-05-23&rft_id=info%3Aarxiv%2F2205.11487&rft.aulast=Saharia&rft.aufirst=Chitwan&rft.au=Chan%2C+William&rft.au=Saxena%2C+Saurabh&rft.au=Li%2C+Lala&rft.au=Whang%2C+Jay&rft.au=Denton%2C+Emily&rft.au=Kamyar+Seyed+Ghasemipour%2C+Seyed&rft.au=Karagol+Ayan%2C+Burcu&rft.au=Sara+Mahdavi%2C+S.&rft.au=Gontijo+Lopes%2C+Rapha&rft.au=Salimans%2C+Tim&rft.au=Ho%2C+Jonathan&rft.au=J+Fleet%2C+David&rft.au=Norouzi%2C+Mohammad&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-Artificial_intelligence_art_computerworld-19"><span class="mw-cite-backlink">^ <a href="#cite_ref-Artificial_intelligence_art_computerworld_19-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Artificial_intelligence_art_computerworld_19-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Artificial_intelligence_art_computerworld_19-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFElgan2022" class="citation news cs1">Elgan, Mike (November 1, 2022). <a rel="nofollow" class="external text" href="https://www.computerworld.com/article/3678172/how-synthetic-media-will-transform-business-forever.html">"How 'synthetic media' will transform business forever"</a>. <i>Computerworld</i><span class="reference-accessdate">. Retrieved <span class="nowrap">November 9,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Computerworld&rft.atitle=How+%27synthetic+media%27+will+transform+business+forever&rft.date=2022-11-01&rft.aulast=Elgan&rft.aufirst=Mike&rft_id=https%3A%2F%2Fwww.computerworld.com%2Farticle%2F3678172%2Fhow-synthetic-media-will-transform-business-forever.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-Artificial_intelligence_art_nytimesRoose-20"><span class="mw-cite-backlink"><b><a href="#cite_ref-Artificial_intelligence_art_nytimesRoose_20-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFRoose2022" class="citation news cs1">Roose, Kevin (October 21, 2022). <a rel="nofollow" class="external text" href="https://www.nytimes.com/2022/10/21/technology/ai-generated-art-jobs-dall-e-2.html">"A.I.-Generated Art Is Already Transforming Creative Work"</a>. <i>The New York Times</i><span class="reference-accessdate">. Retrieved <span class="nowrap">November 16,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=The+New+York+Times&rft.atitle=A.I.-Generated+Art+Is+Already+Transforming+Creative+Work&rft.date=2022-10-21&rft.aulast=Roose&rft.aufirst=Kevin&rft_id=https%3A%2F%2Fwww.nytimes.com%2F2022%2F10%2F21%2Ftechnology%2Fai-generated-art-jobs-dall-e-2.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-Artificial_intelligence_art_CNBCLeswing-21"><span class="mw-cite-backlink">^ <a href="#cite_ref-Artificial_intelligence_art_CNBCLeswing_21-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Artificial_intelligence_art_CNBCLeswing_21-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLeswing" class="citation news cs1">Leswing, Kif. <a rel="nofollow" class="external text" href="https://www.cnbc.com/2022/10/08/generative-ai-silicon-valleys-next-trillion-dollar-companies.html">"Why Silicon Valley is so excited about awkward drawings done by artificial intelligence"</a>. <i>CNBC</i><span class="reference-accessdate">. Retrieved <span class="nowrap">November 16,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=CNBC&rft.atitle=Why+Silicon+Valley+is+so+excited+about+awkward+drawings+done+by+artificial+intelligence&rft.aulast=Leswing&rft.aufirst=Kif&rft_id=https%3A%2F%2Fwww.cnbc.com%2F2022%2F10%2F08%2Fgenerative-ai-silicon-valleys-next-trillion-dollar-companies.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> <li id="cite_note-22"><span class="mw-cite-backlink"><b><a href="#cite_ref-22">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://cloud.google.com/blog/products/ai-machine-learning/imagen-2-on-vertex-ai-is-now-generally-available">"Imagen 2 on Vertex AI is now generally available"</a>. <i>Google Cloud Blog</i><span class="reference-accessdate">. Retrieved <span class="nowrap">January 2,</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Google+Cloud+Blog&rft.atitle=Imagen+2+on+Vertex+AI+is+now+generally+available&rft_id=https%3A%2F%2Fcloud.google.com%2Fblog%2Fproducts%2Fai-machine-learning%2Fimagen-2-on-vertex-ai-is-now-generally-available&rfr_id=info%3Asid%2Fen.wikipedia.org%3AText-to-image+model" class="Z3988"></span></span> </li> </ol></div></div> <div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="Artificial_intelligence" style="padding:3px"><table class="nowraplinks hlist mw-collapsible {{{state}}} navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Artificial_intelligence_(AI)" title="Template:Artificial intelligence (AI)"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Artificial_intelligence_(AI)" class="mw-redirect" title="Template talk:Artificial intelligence (AI)"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Artificial_intelligence_(AI)" title="Special:EditPage/Template:Artificial intelligence (AI)"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Artificial_intelligence" style="font-size:114%;margin:0 4em"><a href="/wiki/Artificial_intelligence" title="Artificial intelligence">Artificial intelligence</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">Concepts</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Parameter" title="Parameter">Parameter</a> <ul><li><a href="/wiki/Hyperparameter_(machine_learning)" title="Hyperparameter (machine learning)">Hyperparameter</a></li></ul></li> <li><a href="/wiki/Loss_functions_for_classification" title="Loss functions for classification">Loss functions</a></li> <li><a href="/wiki/Regression_analysis" title="Regression analysis">Regression</a> <ul><li><a href="/wiki/Bias%E2%80%93variance_tradeoff" title="Bias–variance tradeoff">Bias–variance tradeoff</a></li> <li><a href="/wiki/Double_descent" title="Double descent">Double descent</a></li> <li><a href="/wiki/Overfitting" title="Overfitting">Overfitting</a></li></ul></li> <li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></li> <li><a href="/wiki/Gradient_descent" title="Gradient descent">Gradient descent</a> <ul><li><a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">SGD</a></li> <li><a href="/wiki/Quasi-Newton_method" title="Quasi-Newton method">Quasi-Newton method</a></li> <li><a href="/wiki/Conjugate_gradient_method" title="Conjugate gradient method">Conjugate gradient method</a></li></ul></li> <li><a href="/wiki/Backpropagation" title="Backpropagation">Backpropagation</a></li> <li><a href="/wiki/Attention_(machine_learning)" title="Attention (machine learning)">Attention</a></li> <li><a href="/wiki/Convolution" title="Convolution">Convolution</a></li> <li><a href="/wiki/Normalization_(machine_learning)" title="Normalization (machine learning)">Normalization</a> <ul><li><a href="/wiki/Batch_normalization" title="Batch normalization">Batchnorm</a></li></ul></li> <li><a href="/wiki/Activation_function" title="Activation function">Activation</a> <ul><li><a href="/wiki/Softmax_function" title="Softmax function">Softmax</a></li> <li><a href="/wiki/Sigmoid_function" title="Sigmoid function">Sigmoid</a></li> <li><a href="/wiki/Rectifier_(neural_networks)" title="Rectifier (neural networks)">Rectifier</a></li></ul></li> <li><a href="/wiki/Gating_mechanism" title="Gating mechanism">Gating</a></li> <li><a href="/wiki/Weight_initialization" title="Weight initialization">Weight initialization</a></li> <li><a href="/wiki/Regularization_(mathematics)" title="Regularization (mathematics)">Regularization</a></li> <li><a href="/wiki/Training,_validation,_and_test_data_sets" title="Training, validation, and test data sets">Datasets</a> <ul><li><a href="/wiki/Data_augmentation" title="Data augmentation">Augmentation</a></li></ul></li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a> <ul><li><a href="/wiki/Q-learning" title="Q-learning">Q-learning</a></li> <li><a href="/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action" title="State–action–reward–state–action">SARSA</a></li> <li><a href="/wiki/Imitation_learning" title="Imitation learning">Imitation</a></li></ul></li> <li><a href="/wiki/Diffusion_process" title="Diffusion process">Diffusion</a></li> <li><a href="/wiki/Latent_diffusion_model" title="Latent diffusion model">Latent diffusion model</a></li> <li><a href="/wiki/Autoregressive_model" title="Autoregressive model">Autoregression</a></li> <li><a href="/wiki/Adversarial_machine_learning" title="Adversarial machine learning">Adversary</a></li> <li><a href="/wiki/Retrieval-augmented_generation" title="Retrieval-augmented generation">RAG</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">RLHF</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Prompt_engineering" title="Prompt engineering">Prompt engineering</a></li> <li><a href="/wiki/Word_embedding" title="Word embedding">Word embedding</a></li> <li><a href="/wiki/Hallucination_(artificial_intelligence)" title="Hallucination (artificial intelligence)">Hallucination</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Applications</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a> <ul><li><a href="/wiki/Prompt_engineering#In-context_learning" title="Prompt engineering">In-context learning</a></li></ul></li> <li><a href="/wiki/Neural_network_(machine_learning)" title="Neural network (machine learning)">Artificial neural network</a> <ul><li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li></ul></li> <li><a href="/wiki/Language_model" title="Language model">Language model</a> <ul><li><a href="/wiki/Large_language_model" title="Large language model">Large language model</a></li> <li><a href="/wiki/Neural_machine_translation" title="Neural machine translation">NMT</a></li></ul></li> <li><a href="/wiki/Artificial_general_intelligence" title="Artificial general intelligence">Artificial general intelligence</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Implementations</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Audio–visual</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlexNet" title="AlexNet">AlexNet</a></li> <li><a href="/wiki/WaveNet" title="WaveNet">WaveNet</a></li> <li><a href="/wiki/Human_image_synthesis" title="Human image synthesis">Human image synthesis</a></li> <li><a href="/wiki/Handwriting_recognition" title="Handwriting recognition">HWR</a></li> <li><a href="/wiki/Optical_character_recognition" title="Optical character recognition">OCR</a></li> <li><a href="/wiki/Deep_learning_speech_synthesis" title="Deep learning speech synthesis">Speech synthesis</a> <ul><li><a href="/wiki/ElevenLabs" title="ElevenLabs">ElevenLabs</a></li></ul></li> <li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a> <ul><li><a href="/wiki/Whisper_(speech_recognition_system)" title="Whisper (speech recognition system)">Whisper</a></li></ul></li> <li><a href="/wiki/Facial_recognition_system" title="Facial recognition system">Facial recognition</a></li> <li><a href="/wiki/AlphaFold" title="AlphaFold">AlphaFold</a></li> <li><a class="mw-selflink selflink">Text-to-image models</a> <ul><li><a href="/wiki/DALL-E" title="DALL-E">DALL-E</a></li> <li><a href="/wiki/Flux_(text-to-image_model)" title="Flux (text-to-image model)">Flux</a></li> <li><a href="/wiki/Ideogram_(text-to-image_model)" title="Ideogram (text-to-image model)">Ideogram</a></li> <li><a href="/wiki/Midjourney" title="Midjourney">Midjourney</a></li> <li><a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a></li></ul></li> <li><a href="/wiki/Text-to-video_model" title="Text-to-video model">Text-to-video models</a> <ul><li><a href="/wiki/Sora_(text-to-video_model)" title="Sora (text-to-video model)">Sora</a></li> <li><a href="/wiki/Dream_Machine_(text-to-video_model)" title="Dream Machine (text-to-video model)">Dream Machine</a></li> <li><a href="/wiki/VideoPoet" title="VideoPoet">VideoPoet</a></li></ul></li> <li><a href="/wiki/Music_and_artificial_intelligence" title="Music and artificial intelligence">Music generation</a> <ul><li><a href="/wiki/Suno_AI" title="Suno AI">Suno AI</a></li> <li><a href="/wiki/Udio" title="Udio">Udio</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Text</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Word2vec" title="Word2vec">Word2vec</a></li> <li><a href="/wiki/Seq2seq" title="Seq2seq">Seq2seq</a></li> <li><a href="/wiki/GloVe" title="GloVe">GloVe</a></li> <li><a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a></li> <li><a href="/wiki/T5_(language_model)" title="T5 (language model)">T5</a></li> <li><a href="/wiki/Llama_(language_model)" title="Llama (language model)">Llama</a></li> <li><a href="/wiki/Chinchilla_(language_model)" title="Chinchilla (language model)">Chinchilla AI</a></li> <li><a href="/wiki/PaLM" title="PaLM">PaLM</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPT</a> <ul><li><a href="/wiki/GPT-1" title="GPT-1">1</a></li> <li><a href="/wiki/GPT-2" title="GPT-2">2</a></li> <li><a href="/wiki/GPT-3" title="GPT-3">3</a></li> <li><a href="/wiki/GPT-J" title="GPT-J">J</a></li> <li><a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a></li> <li><a href="/wiki/GPT-4" title="GPT-4">4</a></li> <li><a href="/wiki/GPT-4o" title="GPT-4o">4o</a></li> <li><a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a></li></ul></li> <li><a href="/wiki/Claude_(language_model)" title="Claude (language model)">Claude</a></li> <li><a href="/wiki/Gemini_(language_model)" title="Gemini (language model)">Gemini</a></li> <li><a href="/wiki/Grok_(chatbot)" title="Grok (chatbot)">Grok</a></li> <li><a href="/wiki/LaMDA" title="LaMDA">LaMDA</a></li> <li><a href="/wiki/BLOOM_(language_model)" title="BLOOM (language model)">BLOOM</a></li> <li><a href="/wiki/Project_Debater" title="Project Debater">Project Debater</a></li> <li><a href="/wiki/IBM_Watson" title="IBM Watson">IBM Watson</a></li> <li><a href="/wiki/IBM_Watsonx" title="IBM Watsonx">IBM Watsonx</a></li> <li><a href="/wiki/IBM_Granite" title="IBM Granite">Granite</a></li> <li><a href="/wiki/Huawei_PanGu" title="Huawei PanGu">PanGu-Σ</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Decisional</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlphaGo" title="AlphaGo">AlphaGo</a></li> <li><a href="/wiki/AlphaZero" title="AlphaZero">AlphaZero</a></li> <li><a href="/wiki/OpenAI_Five" title="OpenAI Five">OpenAI Five</a></li> <li><a href="/wiki/Self-driving_car" title="Self-driving car">Self-driving car</a></li> <li><a href="/wiki/MuZero" title="MuZero">MuZero</a></li> <li><a href="/wiki/Action_selection" title="Action selection">Action selection</a> <ul><li><a href="/wiki/AutoGPT" title="AutoGPT">AutoGPT</a></li></ul></li> <li><a href="/wiki/Robot_control" title="Robot control">Robot control</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">People</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Alan_Turing" title="Alan Turing">Alan Turing</a></li> <li><a href="/wiki/Warren_Sturgis_McCulloch" title="Warren Sturgis McCulloch">Warren Sturgis McCulloch</a></li> <li><a href="/wiki/Walter_Pitts" title="Walter Pitts">Walter Pitts</a></li> <li><a href="/wiki/John_von_Neumann" title="John von Neumann">John von Neumann</a></li> <li><a href="/wiki/Claude_Shannon" title="Claude Shannon">Claude Shannon</a></li> <li><a href="/wiki/Marvin_Minsky" title="Marvin Minsky">Marvin Minsky</a></li> <li><a href="/wiki/John_McCarthy_(computer_scientist)" title="John McCarthy (computer scientist)">John McCarthy</a></li> <li><a href="/wiki/Nathaniel_Rochester_(computer_scientist)" title="Nathaniel Rochester (computer scientist)">Nathaniel Rochester</a></li> <li><a href="/wiki/Allen_Newell" title="Allen Newell">Allen Newell</a></li> <li><a href="/wiki/Cliff_Shaw" title="Cliff Shaw">Cliff Shaw</a></li> <li><a href="/wiki/Herbert_A._Simon" title="Herbert A. Simon">Herbert A. Simon</a></li> <li><a href="/wiki/Oliver_Selfridge" title="Oliver Selfridge">Oliver Selfridge</a></li> <li><a href="/wiki/Frank_Rosenblatt" title="Frank Rosenblatt">Frank Rosenblatt</a></li> <li><a href="/wiki/Bernard_Widrow" title="Bernard Widrow">Bernard Widrow</a></li> <li><a href="/wiki/Joseph_Weizenbaum" title="Joseph Weizenbaum">Joseph Weizenbaum</a></li> <li><a href="/wiki/Seymour_Papert" title="Seymour Papert">Seymour Papert</a></li> <li><a href="/wiki/Seppo_Linnainmaa" title="Seppo Linnainmaa">Seppo Linnainmaa</a></li> <li><a href="/wiki/Paul_Werbos" title="Paul Werbos">Paul Werbos</a></li> <li><a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a></li> <li><a href="/wiki/Yann_LeCun" title="Yann LeCun">Yann LeCun</a></li> <li><a href="/wiki/Geoffrey_Hinton" title="Geoffrey Hinton">Geoffrey Hinton</a></li> <li><a href="/wiki/John_Hopfield" title="John Hopfield">John Hopfield</a></li> <li><a href="/wiki/Yoshua_Bengio" title="Yoshua Bengio">Yoshua Bengio</a></li> <li><a href="/wiki/Lotfi_A._Zadeh" title="Lotfi A. Zadeh">Lotfi A. Zadeh</a></li> <li><a href="/wiki/Stephen_Grossberg" title="Stephen Grossberg">Stephen Grossberg</a></li> <li><a href="/wiki/Alex_Graves_(computer_scientist)" title="Alex Graves (computer scientist)">Alex Graves</a></li> <li><a href="/wiki/Andrew_Ng" title="Andrew Ng">Andrew Ng</a></li> <li><a href="/wiki/Fei-Fei_Li" title="Fei-Fei Li">Fei-Fei Li</a></li> <li><a href="/wiki/Alex_Krizhevsky" title="Alex Krizhevsky">Alex Krizhevsky</a></li> <li><a href="/wiki/Ilya_Sutskever" title="Ilya Sutskever">Ilya Sutskever</a></li> <li><a href="/wiki/Demis_Hassabis" title="Demis Hassabis">Demis Hassabis</a></li> <li><a href="/wiki/David_Silver_(computer_scientist)" title="David Silver (computer scientist)">David Silver</a></li> <li><a href="/wiki/Ian_Goodfellow" title="Ian Goodfellow">Ian Goodfellow</a></li> <li><a href="/wiki/Andrej_Karpathy" title="Andrej Karpathy">Andrej Karpathy</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Organizations</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Anthropic" title="Anthropic">Anthropic</a></li> <li><a href="/wiki/EleutherAI" title="EleutherAI">EleutherAI</a></li> <li><a href="/wiki/Google_DeepMind" title="Google DeepMind">Google DeepMind</a></li> <li><a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></li> <li><a href="/wiki/Kuaishou" title="Kuaishou">Kuaishou</a></li> <li><a href="/wiki/Meta_AI" title="Meta AI">Meta AI</a></li> <li><a href="/wiki/Mila_(research_institute)" title="Mila (research institute)">Mila</a></li> <li><a href="/wiki/MiniMax_(company)" title="MiniMax (company)">MiniMax</a></li> <li><a href="/wiki/Mistral_AI" title="Mistral AI">Mistral AI</a></li> <li><a href="/wiki/MIT_Computer_Science_and_Artificial_Intelligence_Laboratory" title="MIT Computer Science and Artificial Intelligence Laboratory">MIT CSAIL</a></li> <li><a href="/wiki/OpenAI" title="OpenAI">OpenAI</a></li> <li><a href="/wiki/Runway_(company)" title="Runway (company)">Runway</a></li> <li><a href="/wiki/Stability_AI" title="Stability AI">Stability AI</a></li> <li><a href="/wiki/XAI_(company)" title="XAI (company)">xAI</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Architectures</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Neural_Turing_machine" title="Neural Turing machine">Neural Turing machine</a></li> <li><a href="/wiki/Differentiable_neural_computer" title="Differentiable neural computer">Differentiable neural computer</a></li> <li><a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a> <ul><li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision transformer (ViT)</a></li></ul></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network (RNN)</a></li> <li><a href="/wiki/Long_short-term_memory" title="Long short-term memory">Long short-term memory (LSTM)</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">Gated recurrent unit (GRU)</a></li> <li><a href="/wiki/Echo_state_network" title="Echo state network">Echo state network</a></li> <li><a href="/wiki/Multilayer_perceptron" title="Multilayer perceptron">Multilayer perceptron (MLP)</a></li> <li><a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">Convolutional neural network (CNN)</a></li> <li><a href="/wiki/Residual_neural_network" title="Residual neural network">Residual neural network (RNN)</a></li> <li><a href="/wiki/Highway_network" title="Highway network">Highway network</a></li> <li><a href="/wiki/Mamba_(deep_learning_architecture)" title="Mamba (deep learning architecture)">Mamba</a></li> <li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Variational_autoencoder" title="Variational autoencoder">Variational autoencoder (VAE)</a></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">Generative adversarial network (GAN)</a></li> <li><a href="/wiki/Graph_neural_network" title="Graph neural network">Graph neural network (GNN)</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><a href="/wiki/File:Symbol_portal_class.svg" class="mw-file-description" title="Portal"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/16px-Symbol_portal_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/23px-Symbol_portal_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/31px-Symbol_portal_class.svg.png 2x" data-file-width="180" data-file-height="185" /></a></span> Portals <ul><li><a href="/wiki/Portal:Technology" title="Portal:Technology">Technology</a></li></ul></li> <li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> Categories <ul><li><a href="/wiki/Category:Artificial_neural_networks" title="Category:Artificial neural networks">Artificial neural networks</a></li> <li><a href="/wiki/Category:Machine_learning" title="Category:Machine learning">Machine learning</a></li></ul></li></ul> </div></td></tr></tbody></table></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236075235"></div><div role="navigation" class="navbox" aria-labelledby="Computer_graphics" style="padding:3px"><table class="nowraplinks mw-collapsible mw-collapsed navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239400231"><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Computer_graphics" title="Template:Computer graphics"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Computer_graphics" title="Template talk:Computer graphics"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Computer_graphics" title="Special:EditPage/Template:Computer graphics"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Computer_graphics" style="font-size:114%;margin:0 4em"><a href="/wiki/Computer_graphics_(computer_science)" title="Computer graphics (computer science)">Computer graphics</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Vector_graphics" title="Vector graphics">Vector graphics</a></th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Diffusion_curve" title="Diffusion curve">Diffusion curve</a></li> <li><a href="/wiki/Pixel" title="Pixel">Pixel</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/2D_computer_graphics" title="2D computer graphics">2D graphics</a></th><td class="navbox-list-with-group navbox-list navbox-even hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Alpha_compositing" title="Alpha compositing">Alpha compositing</a></li> <li><a href="/wiki/Layers_(digital_image_editing)" title="Layers (digital image editing)">Layers</a></li> <li><a class="mw-selflink selflink">Text-to-image</a></li></ul> </div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th id="2.5D" scope="row" class="navbox-group" style="width:1%"><a href="/wiki/2.5D" title="2.5D">2.5D</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Isometric_video_game_graphics" title="Isometric video game graphics">Isometric graphics</a></li> <li><a href="/wiki/Mode_7" title="Mode 7">Mode 7</a></li> <li><a href="/wiki/Parallax_scrolling" title="Parallax scrolling">Parallax scrolling</a></li> <li><a href="/wiki/Ray_casting" title="Ray casting">Ray casting</a></li> <li><a href="/wiki/Skybox_(video_games)" title="Skybox (video games)">Skybox</a></li></ul> </div></td></tr></tbody></table><div> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/3D_computer_graphics" title="3D computer graphics">3D graphics</a></th><td class="navbox-list-with-group navbox-list navbox-even hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/3D_projection" title="3D projection">3D projection</a></li> <li><a href="/wiki/3D_rendering" title="3D rendering">3D rendering</a></li> <li>(<a href="/wiki/Image-based_modeling_and_rendering" title="Image-based modeling and rendering">Image-based</a></li> <li><a href="/wiki/Spectral_rendering" title="Spectral rendering">Spectral</a></li> <li><a href="/wiki/Unbiased_rendering" title="Unbiased rendering">Unbiased</a>)</li> <li><a href="/wiki/Aliasing" title="Aliasing">Aliasing</a></li> <li><a href="/wiki/Anisotropic_filtering" title="Anisotropic filtering">Anisotropic filtering</a></li> <li><a href="/wiki/Cel_shading" title="Cel shading">Cel shading</a></li> <li><a href="/wiki/Fluid_animation" title="Fluid animation">Fluid animation</a></li> <li><a href="/wiki/Computer_graphics_lighting" title="Computer graphics lighting">Lighting</a> <ul><li><a href="/wiki/Global_illumination" title="Global illumination">Global illumination</a></li></ul></li> <li><a href="/wiki/Hidden-surface_determination" title="Hidden-surface determination">Hidden-surface determination</a></li> <li><a href="/wiki/Polygon_mesh" title="Polygon mesh">Polygon mesh</a></li> <li>(<a href="/wiki/Triangle_mesh" title="Triangle mesh">Triangle mesh</a>)</li> <li><a href="/wiki/Shading" title="Shading">Shading</a> <ul><li><a href="/wiki/Deferred_shading" title="Deferred shading">Deferred</a></li></ul></li> <li><a href="/wiki/Surface_triangulation" title="Surface triangulation">Surface triangulation</a></li> <li><a href="/wiki/Wire-frame_model" title="Wire-frame model">Wire-frame model</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Concepts</th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Affine_transformation" title="Affine transformation">Affine transformation</a></li> <li><a href="/wiki/Back-face_culling" title="Back-face culling">Back-face culling</a></li> <li><a href="/wiki/Clipping_(computer_graphics)" title="Clipping (computer graphics)">Clipping</a></li> <li><a href="/wiki/Collision_detection" title="Collision detection">Collision detection</a></li> <li><a href="/wiki/Planar_projection" title="Planar projection">Planar projection</a></li> <li><a href="/wiki/Reflection_(computer_graphics)" title="Reflection (computer graphics)">Reflection</a></li> <li><a href="/wiki/Rendering_(computer_graphics)" title="Rendering (computer graphics)">Rendering</a> <ul><li><a href="/wiki/Beam_tracing" title="Beam tracing">Beam tracing</a></li> <li><a href="/wiki/Cone_tracing" title="Cone tracing">Cone tracing</a></li> <li><a href="/wiki/Checkerboard_rendering" title="Checkerboard rendering">Checkerboard rendering</a></li> <li><a href="/wiki/Ray_tracing_(graphics)" title="Ray tracing (graphics)">Ray tracing</a></li> <li><a href="/wiki/Path_tracing" title="Path tracing">Path tracing</a></li> <li><a href="/wiki/Ray_casting" title="Ray casting">Ray casting</a></li> <li><a href="/wiki/Scanline_rendering" title="Scanline rendering">Scanline rendering</a></li></ul></li> <li><a href="/wiki/Rotation_(mathematics)" title="Rotation (mathematics)">Rotation</a></li> <li><a href="/wiki/Scaling_(geometry)" title="Scaling (geometry)">Scaling</a></li> <li><a href="/wiki/Shadow_mapping" title="Shadow mapping">Shadow mapping</a></li> <li><a href="/wiki/Shadow_volume" title="Shadow volume">Shadow volume</a></li> <li><a href="/wiki/Shear_matrix" class="mw-redirect" title="Shear matrix">Shear matrix</a></li> <li><a href="/wiki/Shader" title="Shader">Shader</a></li> <li><a href="/wiki/Texel_(graphics)" title="Texel (graphics)">Texel</a></li> <li><a href="/wiki/Translation_(geometry)" title="Translation (geometry)">Translation</a></li> <li><a href="/wiki/Volume_rendering" title="Volume rendering">Volume rendering</a></li> <li><a href="/wiki/Voxel" title="Voxel">Voxel</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Graphics_software" title="Graphics software">Graphics software</a></th><td class="navbox-list-with-group navbox-list navbox-even hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/List_of_3D_computer_graphics_software" title="List of 3D computer graphics software">3D computer graphics software</a> <ul><li><a href="/wiki/List_of_3D_animation_software" title="List of 3D animation software">animation</a></li> <li><a href="/wiki/List_of_3D_modeling_software" title="List of 3D modeling software">modeling</a></li> <li><a href="/wiki/List_of_3D_rendering_software" title="List of 3D rendering software">rendering</a></li></ul></li> <li><a href="/wiki/Raster_graphics_editor" title="Raster graphics editor">Raster graphics editors</a></li> <li><a href="/wiki/Comparison_of_vector_graphics_editors" title="Comparison of vector graphics editors">Vector graphics editors</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Algorithms</th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/List_of_computer_graphics_algorithms" class="mw-redirect" title="List of computer graphics algorithms">List of computer graphics algorithms</a></li></ul> </div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐web.codfw.main‐5cd4cd96d5‐vtnqm Cached time: 20241127035524 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.670 seconds Real time usage: 0.855 seconds Preprocessor visited node count: 5671/1000000 Post‐expand include size: 100979/2097152 bytes Template argument size: 2698/2097152 bytes Highest expansion depth: 12/100 Expensive parser function count: 9/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 102265/5000000 bytes Lua time usage: 0.411/10.000 seconds Lua memory usage: 6643038/52428800 bytes Number of Wikibase entities loaded: 0/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 738.036 1 -total 34.90% 257.567 2 Template:Reflist 14.67% 108.266 1 Template:Excerpt 14.01% 103.374 4 Template:Navbox 13.42% 99.045 5 Template:Cite_news 13.25% 97.821 1 Template:Artificial_intelligence_(AI) 10.72% 79.147 1 Template:Short_description 9.76% 72.036 13 Template:R 8.41% 62.085 16 Template:R/ref 6.89% 50.837 1 Template:When? --> <!-- Saved in parser cache with key enwiki:pcache:idhash:71701751-0!canonical and timestamp 20241127035524 and revision id 1258324969. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Text-to-image_model&oldid=1258324969">https://en.wikipedia.org/w/index.php?title=Text-to-image_model&oldid=1258324969</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Category</a>: <ul><li><a href="/wiki/Category:Text-to-image_generation" title="Category:Text-to-image generation">Text-to-image generation</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_matches_Wikidata" title="Category:Short description matches Wikidata">Short description matches Wikidata</a></li><li><a href="/wiki/Category:Use_mdy_dates_from_November_2024" title="Category:Use mdy dates from November 2024">Use mdy dates from November 2024</a></li><li><a href="/wiki/Category:All_articles_with_vague_or_ambiguous_time" title="Category:All articles with vague or ambiguous time">All articles with vague or ambiguous time</a></li><li><a href="/wiki/Category:Vague_or_ambiguous_time_from_October_2024" title="Category:Vague or ambiguous time from October 2024">Vague or ambiguous time from October 2024</a></li><li><a href="/wiki/Category:Pages_using_multiple_image_with_auto_scaled_images" title="Category:Pages using multiple image with auto scaled images">Pages using multiple image with auto scaled images</a></li><li><a href="/wiki/Category:Articles_with_excerpts" title="Category:Articles with excerpts">Articles with excerpts</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 19 November 2024, at 02:39<span class="anonymous-show"> (UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Text-to-image_model&mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-c999fddf4-x6gqr","wgBackendResponseTime":125,"wgPageParseReport":{"limitreport":{"cputime":"0.670","walltime":"0.855","ppvisitednodes":{"value":5671,"limit":1000000},"postexpandincludesize":{"value":100979,"limit":2097152},"templateargumentsize":{"value":2698,"limit":2097152},"expansiondepth":{"value":12,"limit":100},"expensivefunctioncount":{"value":9,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":102265,"limit":5000000},"entityaccesscount":{"value":0,"limit":400},"timingprofile":["100.00% 738.036 1 -total"," 34.90% 257.567 2 Template:Reflist"," 14.67% 108.266 1 Template:Excerpt"," 14.01% 103.374 4 Template:Navbox"," 13.42% 99.045 5 Template:Cite_news"," 13.25% 97.821 1 Template:Artificial_intelligence_(AI)"," 10.72% 79.147 1 Template:Short_description"," 9.76% 72.036 13 Template:R"," 8.41% 62.085 16 Template:R/ref"," 6.89% 50.837 1 Template:When?"]},"scribunto":{"limitreport-timeusage":{"value":"0.411","limit":"10.000"},"limitreport-memusage":{"value":6643038,"limit":52428800}},"cachereport":{"origin":"mw-web.codfw.main-5cd4cd96d5-vtnqm","timestamp":"20241127035524","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Text-to-image model","url":"https:\/\/en.wikipedia.org\/wiki\/Text-to-image_model","sameAs":"http:\/\/www.wikidata.org\/entity\/Q113940039","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q113940039","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2022-09-07T16:50:38Z","dateModified":"2024-11-19T02:39:28Z","image":"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/3\/36\/Astronaut_Riding_a_Horse_Hiroshige_%28SD3.5%29.webp","headline":"machine learning model"}</script> </body> </html>