CINXE.COM
Imitation learning - Wikipedia
<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Imitation learning - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"8106d3d9-b780-4411-bbb5-ac1533368685","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Imitation_learning","wgTitle":"Imitation learning","wgCurRevisionId":1257430501,"wgRevisionId":1257430501,"wgArticleId":73429960,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description matches Wikidata","Supervised learning"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Imitation_learning","wgRelevantArticleId":73429960,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{ "levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":10000,"wgRelatedArticlesCompat":[],"wgCentralAuthMobileDomain":false,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q124742024","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"ext.globalCssJs.user.styles":"ready", "site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","ext.math.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","mediawiki.page.media","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents", "ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession","wikibase.sidebar.tracking"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.cite.styles%7Cext.math.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&only=styles&skin=vector-2022"> <script async="" src="/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&modules=site.styles&only=styles&skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Imitation learning - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Imitation_learning"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Imitation_learning&action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Imitation_learning"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Imitation_learning rootpage-Imitation_learning skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page's font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&returnto=Imitation+learning" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&returnto=Imitation+learning" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&returnto=Imitation+learning" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&returnto=Imitation+learning" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-Approaches" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Approaches"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Approaches</span> </div> </a> <button aria-controls="toc-Approaches-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Approaches subsection</span> </button> <ul id="toc-Approaches-sublist" class="vector-toc-list"> <li id="toc-Behavior_Cloning" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Behavior_Cloning"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.1</span> <span>Behavior Cloning</span> </div> </a> <ul id="toc-Behavior_Cloning-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-DAgger" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#DAgger"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.2</span> <span>DAgger</span> </div> </a> <ul id="toc-DAgger-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Decision_transformer" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Decision_transformer"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.3</span> <span>Decision transformer</span> </div> </a> <ul id="toc-Decision_transformer-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Other_approaches" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Other_approaches"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.4</span> <span>Other approaches</span> </div> </a> <ul id="toc-Other_approaches-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Related_approaches" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Related_approaches"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Related approaches</span> </div> </a> <ul id="toc-Related_approaches-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Further_reading" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Further_reading"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Further reading</span> </div> </a> <ul id="toc-Further_reading-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Imitation learning</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 1 language" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-1" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">1 language</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-sl mw-list-item"><a href="https://sl.wikipedia.org/wiki/U%C4%8Denje_s_posnemanjem_(robotika)" title="Učenje s posnemanjem (robotika) – Slovenian" lang="sl" hreflang="sl" data-title="Učenje s posnemanjem (robotika)" data-language-autonym="Slovenščina" data-language-local-name="Slovenian" class="interlanguage-link-target"><span>Slovenščina</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q124742024#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Imitation_learning" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="new vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Talk:Imitation_learning&action=edit&redlink=1" rel="discussion" class="new" title="Discuss improvements to the content page (page does not exist) [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Imitation_learning"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Imitation_learning&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Imitation_learning&action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Imitation_learning"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Imitation_learning&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Imitation_learning&action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Imitation_learning" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Imitation_learning" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Imitation_learning&oldid=1257430501" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Imitation_learning&action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&page=Imitation_learning&id=1257430501&wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FImitation_learning"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FImitation_learning"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&page=Imitation_learning&action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Imitation_learning&printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q124742024" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Machine learning technique where agents learn from demonstrations</div> <p><b>Imitation learning</b> is a paradigm in <a href="/wiki/Reinforcement_learning" title="Reinforcement learning">reinforcement learning</a>, where an agent learns to perform a task by <a href="/wiki/Supervised_learning" title="Supervised learning">supervised learning</a> from expert demonstrations. It is also called <b>learning from demonstration</b> and <b>apprenticeship learning</b>.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup> </p><p>It has been applied to underactuated robotics,<sup id="cite_ref-4" class="reference"><a href="#cite_note-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup> self-driving cars,<sup id="cite_ref-:0_5-0" class="reference"><a href="#cite_note-:0-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">[</span>6<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> quadcopter navigation,<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup> helicopter aerobatics,<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span class="cite-bracket">[</span>9<span class="cite-bracket">]</span></a></sup> and locomotion.<sup id="cite_ref-10" class="reference"><a href="#cite_note-10"><span class="cite-bracket">[</span>10<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-11" class="reference"><a href="#cite_note-11"><span class="cite-bracket">[</span>11<span class="cite-bracket">]</span></a></sup> </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="Approaches">Approaches</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Imitation_learning&action=edit&section=1" title="Edit section: Approaches"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Expert demonstrations are recordings of an expert performing the desired task, often collected as state-action pairs <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (o_{t}^{*},a_{t}^{*})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msubsup> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> <mo>,</mo> <msubsup> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (o_{t}^{*},a_{t}^{*})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7b779bb9552d6e9175008c89a5dfddd2c4655800" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:7.309ex; height:3.009ex;" alt="{\displaystyle (o_{t}^{*},a_{t}^{*})}"></span>. </p> <div class="mw-heading mw-heading3"><h3 id="Behavior_Cloning">Behavior Cloning</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Imitation_learning&action=edit&section=2" title="Edit section: Behavior Cloning"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><b>Behavior Cloning</b> (BC) is the most basic form of imitation learning. Essentially, it uses supervised learning to train a policy <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \pi _{\theta }}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>π<!-- π --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>θ<!-- θ --></mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \pi _{\theta }}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2a9f6266b9302e47ccd37e867f4a364ff52fb8af" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.328ex; height:2.009ex;" alt="{\displaystyle \pi _{\theta }}"></span> such that, given an observation <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle o_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle o_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3cd2cf4bfdabc8ae396ce3fa32aeb871efb3d732" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.954ex; height:2.009ex;" alt="{\displaystyle o_{t}}"></span>, it would output an action distribution <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \pi _{\theta }(\cdot |o_{t})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>π<!-- π --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>θ<!-- θ --></mi> </mrow> </msub> <mo stretchy="false">(</mo> <mo>⋅<!-- ⋅ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">|</mo> </mrow> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \pi _{\theta }(\cdot |o_{t})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/39f66f8079df938efcebe34d94cc6ec0a1365091" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.385ex; height:2.843ex;" alt="{\displaystyle \pi _{\theta }(\cdot |o_{t})}"></span> that is approximately the same as the action distribution of the experts.<sup id="cite_ref-:1_12-0" class="reference"><a href="#cite_note-:1-12"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup> </p><p>BC is susceptible to <a href="/w/index.php?title=Distribution_shift&action=edit&redlink=1" class="new" title="Distribution shift (page does not exist)">distribution shift</a>. Specifically, if the trained policy differs from the expert policy, it might find itself straying from expert trajectory into observations that would have never occurred in expert trajectories.<sup id="cite_ref-:1_12-1" class="reference"><a href="#cite_note-:1-12"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup> </p><p>This was already noted by <a href="/wiki/ALVINN" class="mw-redirect" title="ALVINN">ALVINN</a>, where they trained a neural network to drive a van using human demonstrations. They noticed that because a human driver never strays far from the path, the network would never be trained on what action to take if it ever finds itself straying far from the path.<sup id="cite_ref-:0_5-1" class="reference"><a href="#cite_note-:0-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="DAgger">DAgger</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Imitation_learning&action=edit&section=3" title="Edit section: DAgger"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><b>Dagger</b> (<b>D</b>ataset <b>Aggr</b>egation)<sup id="cite_ref-13" class="reference"><a href="#cite_note-13"><span class="cite-bracket">[</span>13<span class="cite-bracket">]</span></a></sup> improves on behavior cloning by iteratively training on a dataset of expert demonstrations. In each iteration, the algorithm first collects data by rolling out the learned policy <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \pi _{\theta }}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>π<!-- π --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>θ<!-- θ --></mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \pi _{\theta }}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2a9f6266b9302e47ccd37e867f4a364ff52fb8af" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.328ex; height:2.009ex;" alt="{\displaystyle \pi _{\theta }}"></span>. Then, it queries the expert for the optimal action <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle a_{t}^{*}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msubsup> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle a_{t}^{*}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/dcc8569dc7d75ca90100465216f328fd0cb0194e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:2.284ex; height:2.843ex;" alt="{\displaystyle a_{t}^{*}}"></span> on each observation <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle o_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle o_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3cd2cf4bfdabc8ae396ce3fa32aeb871efb3d732" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.954ex; height:2.009ex;" alt="{\displaystyle o_{t}}"></span> encountered during the rollout. Finally, it aggregates the new data into the dataset<span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle D\leftarrow D\cup \{(o_{1},a_{1}^{*}),(o_{2},a_{2}^{*}),...,(o_{T},a_{T}^{*})\}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>D</mi> <mo stretchy="false">←<!-- ← --></mo> <mi>D</mi> <mo>∪<!-- ∪ --></mo> <mo fence="false" stretchy="false">{</mo> <mo stretchy="false">(</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msubsup> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> <mo stretchy="false">)</mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>,</mo> <msubsup> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> <mo stretchy="false">)</mo> <mo>,</mo> <mo>.</mo> <mo>.</mo> <mo>.</mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>T</mi> </mrow> </msub> <mo>,</mo> <msubsup> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>T</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> <mo stretchy="false">)</mo> <mo fence="false" stretchy="false">}</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle D\leftarrow D\cup \{(o_{1},a_{1}^{*}),(o_{2},a_{2}^{*}),...,(o_{T},a_{T}^{*})\}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e95837cbee3d126d10db20ab0be55101672b670f" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:41.171ex; height:3.009ex;" alt="{\displaystyle D\leftarrow D\cup \{(o_{1},a_{1}^{*}),(o_{2},a_{2}^{*}),...,(o_{T},a_{T}^{*})\}}"></span>and trains a new policy on the aggregated dataset.<sup id="cite_ref-:1_12-2" class="reference"><a href="#cite_note-:1-12"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Decision_transformer">Decision transformer</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Imitation_learning&action=edit&section=4" title="Edit section: Decision transformer"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Decision_Transformer_architecture.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/2/2e/Decision_Transformer_architecture.png/220px-Decision_Transformer_architecture.png" decoding="async" width="220" height="102" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/2/2e/Decision_Transformer_architecture.png/330px-Decision_Transformer_architecture.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/2/2e/Decision_Transformer_architecture.png/440px-Decision_Transformer_architecture.png 2x" data-file-width="2570" data-file-height="1190" /></a><figcaption>Architecture diagram of the decision transformer.</figcaption></figure> <p>The Decision Transformer approach models reinforcement learning as a sequence modelling problem.<sup id="cite_ref-14" class="reference"><a href="#cite_note-14"><span class="cite-bracket">[</span>14<span class="cite-bracket">]</span></a></sup> Similar to Behavior Cloning, it trains a sequence model, such as a <a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a>, that models rollout sequences <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (R_{1},o_{1},a_{1}),(R_{2},o_{2},a_{2}),\dots ,(R_{t},o_{t},a_{t}),}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>R</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>R</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <mo>…<!-- … --></mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>R</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (R_{1},o_{1},a_{1}),(R_{2},o_{2},a_{2}),\dots ,(R_{t},o_{t},a_{t}),}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8453021de79bb88bdb80bc7b901a535edb740704" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:39.658ex; height:2.843ex;" alt="{\displaystyle (R_{1},o_{1},a_{1}),(R_{2},o_{2},a_{2}),\dots ,(R_{t},o_{t},a_{t}),}"></span>where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle R_{t}=r_{t}+r_{t+1}+\dots +r_{T}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>R</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>r</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>r</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <mo>⋯<!-- ⋯ --></mo> <mo>+</mo> <msub> <mi>r</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>T</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle R_{t}=r_{t}+r_{t+1}+\dots +r_{T}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f28eaa7469b3d0532fd8ec527447a899b3a8af2b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:25.221ex; height:2.509ex;" alt="{\displaystyle R_{t}=r_{t}+r_{t+1}+\dots +r_{T}}"></span> is the sum of future reward in the rollout. During training time, the sequence model is trained to predict each action <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle a_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle a_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/77fce84b535e9e195e3d30ce5ae09b372d87e2e9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.056ex; height:2.009ex;" alt="{\displaystyle a_{t}}"></span>, given the previous rollout as context:<span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (R_{1},o_{1},a_{1}),(R_{2},o_{2},a_{2}),\dots ,(R_{t},o_{t})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>R</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>R</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>a</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <mo>…<!-- … --></mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>R</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (R_{1},o_{1},a_{1}),(R_{2},o_{2},a_{2}),\dots ,(R_{t},o_{t})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9a4adce0a9cdeff5c35a1471a1ae536ca2c13f9c" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:35.922ex; height:2.843ex;" alt="{\displaystyle (R_{1},o_{1},a_{1}),(R_{2},o_{2},a_{2}),\dots ,(R_{t},o_{t})}"></span>During inference time, to use the sequence model as an effective controller, it is simply given a very high reward prediction <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle R}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>R</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle R}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4b0bfb3769bf24d80e15374dc37b0441e2616e33" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.764ex; height:2.176ex;" alt="{\displaystyle R}"></span>, and it would generalize by predicting an action that would result in the high reward. This was shown to <a href="/wiki/Neural_scaling_law" title="Neural scaling law">scale predictably</a> to a Transformer with 1 billion parameters that is superhuman on 41 <a href="/wiki/Atari_Games" title="Atari Games">Atari games</a>.<sup id="cite_ref-15" class="reference"><a href="#cite_note-15"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Other_approaches">Other approaches</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Imitation_learning&action=edit&section=5" title="Edit section: Other approaches"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>See <sup id="cite_ref-:2_16-0" class="reference"><a href="#cite_note-:2-16"><span class="cite-bracket">[</span>16<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-17" class="reference"><a href="#cite_note-17"><span class="cite-bracket">[</span>17<span class="cite-bracket">]</span></a></sup> for more examples. </p> <div class="mw-heading mw-heading2"><h2 id="Related_approaches">Related approaches</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Imitation_learning&action=edit&section=6" title="Edit section: Related approaches"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><a href="/wiki/Inverse_reinforcement_learning" class="mw-redirect" title="Inverse reinforcement learning">Inverse Reinforcement Learning (IRL)</a> learns a reward function that explains the expert's behavior and then uses reinforcement learning to find a policy that maximizes this reward.<sup id="cite_ref-18" class="reference"><a href="#cite_note-18"><span class="cite-bracket">[</span>18<span class="cite-bracket">]</span></a></sup> </p><p>Generative Adversarial Imitation Learning (GAIL) uses <a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">generative adversarial networks</a> (GANs) to match the distribution of agent behavior to the distribution of expert demonstrations.<sup id="cite_ref-19" class="reference"><a href="#cite_note-19"><span class="cite-bracket">[</span>19<span class="cite-bracket">]</span></a></sup> It extends a previous approach using game theory.<sup id="cite_ref-20" class="reference"><a href="#cite_note-20"><span class="cite-bracket">[</span>20<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-:2_16-1" class="reference"><a href="#cite_note-:2-16"><span class="cite-bracket">[</span>16<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Imitation_learning&action=edit&section=7" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></li> <li><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a></li> <li><a href="/wiki/Inverse_reinforcement_learning" class="mw-redirect" title="Inverse reinforcement learning">Inverse reinforcement learning</a></li></ul> <div class="mw-heading mw-heading2"><h2 id="Further_reading">Further reading</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Imitation_learning&action=edit&section=8" title="Edit section: Further reading"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite id="CITEREFHusseinGaberElyanJayne2018" class="citation journal cs1">Hussein, Ahmed; Gaber, Mohamed Medhat; Elyan, Eyad; Jayne, Chrisina (2018-03-31). <a rel="nofollow" class="external text" href="https://dl.acm.org/doi/10.1145/3054912">"Imitation Learning: A Survey of Learning Methods"</a>. <i>ACM Computing Surveys</i>. <b>50</b> (2): 1–35. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1145%2F3054912">10.1145/3054912</a>. <a href="/wiki/Hdl_(identifier)" class="mw-redirect" title="Hdl (identifier)">hdl</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://hdl.handle.net/10059%2F2298">10059/2298</a></span>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0360-0300">0360-0300</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=ACM+Computing+Surveys&rft.atitle=Imitation+Learning%3A+A+Survey+of+Learning+Methods&rft.volume=50&rft.issue=2&rft.pages=1-35&rft.date=2018-03-31&rft_id=info%3Ahdl%2F10059%2F2298&rft.issn=0360-0300&rft_id=info%3Adoi%2F10.1145%2F3054912&rft.aulast=Hussein&rft.aufirst=Ahmed&rft.au=Gaber%2C+Mohamed+Medhat&rft.au=Elyan%2C+Eyad&rft.au=Jayne%2C+Chrisina&rft_id=https%3A%2F%2Fdl.acm.org%2Fdoi%2F10.1145%2F3054912&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></li> <li class="mw-empty-elt"></li></ul> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Imitation_learning&action=edit&section=9" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFRussellNorvig2021" class="citation book cs1">Russell, Stuart J.; Norvig, Peter (2021). "22.6 Apprenticeship and Inverse Reinforcement Learning". <i>Artificial intelligence: a modern approach</i>. Pearson series in artificial intelligence (Fourth ed.). Hoboken: Pearson. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a> <a href="/wiki/Special:BookSources/978-0-13-461099-3" title="Special:BookSources/978-0-13-461099-3"><bdi>978-0-13-461099-3</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=bookitem&rft.atitle=22.6+Apprenticeship+and+Inverse+Reinforcement+Learning&rft.btitle=Artificial+intelligence%3A+a+modern+approach&rft.place=Hoboken&rft.series=Pearson+series+in+artificial+intelligence&rft.edition=Fourth&rft.pub=Pearson&rft.date=2021&rft.isbn=978-0-13-461099-3&rft.aulast=Russell&rft.aufirst=Stuart+J.&rft.au=Norvig%2C+Peter&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSuttonBarto2018" class="citation book cs1">Sutton, Richard S.; Barto, Andrew G. (2018). <i>Reinforcement learning: an introduction</i>. Adaptive computation and machine learning series (Second ed.). Cambridge, Massachusetts: The MIT Press. p. 470. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a> <a href="/wiki/Special:BookSources/978-0-262-03924-6" title="Special:BookSources/978-0-262-03924-6"><bdi>978-0-262-03924-6</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=Reinforcement+learning%3A+an+introduction&rft.place=Cambridge%2C+Massachusetts&rft.series=Adaptive+computation+and+machine+learning+series&rft.pages=470&rft.edition=Second&rft.pub=The+MIT+Press&rft.date=2018&rft.isbn=978-0-262-03924-6&rft.aulast=Sutton&rft.aufirst=Richard+S.&rft.au=Barto%2C+Andrew+G.&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-3">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHusseinGaberElyanJayne2017" class="citation journal cs1">Hussein, Ahmed; Gaber, Mohamed Medhat; Elyan, Eyad; Jayne, Chrisina (2017-04-06). <a rel="nofollow" class="external text" href="https://doi.org/10.1145/3054912">"Imitation Learning: A Survey of Learning Methods"</a>. <i>ACM Comput. Surv</i>. <b>50</b> (2): 21:1–21:35. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1145%2F3054912">10.1145/3054912</a>. <a href="/wiki/Hdl_(identifier)" class="mw-redirect" title="Hdl (identifier)">hdl</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://hdl.handle.net/10059%2F2298">10059/2298</a></span>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0360-0300">0360-0300</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=ACM+Comput.+Surv.&rft.atitle=Imitation+Learning%3A+A+Survey+of+Learning+Methods&rft.volume=50&rft.issue=2&rft.pages=21%3A1-21%3A35&rft.date=2017-04-06&rft_id=info%3Ahdl%2F10059%2F2298&rft.issn=0360-0300&rft_id=info%3Adoi%2F10.1145%2F3054912&rft.aulast=Hussein&rft.aufirst=Ahmed&rft.au=Gaber%2C+Mohamed+Medhat&rft.au=Elyan%2C+Eyad&rft.au=Jayne%2C+Chrisina&rft_id=https%3A%2F%2Fdoi.org%2F10.1145%2F3054912&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-4"><span class="mw-cite-backlink"><b><a href="#cite_ref-4">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://underactuated.mit.edu/imitation.html">"Ch. 21 - Imitation Learning"</a>. <i>underactuated.mit.edu</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-08-08</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=underactuated.mit.edu&rft.atitle=Ch.+21+-+Imitation+Learning&rft_id=https%3A%2F%2Funderactuated.mit.edu%2Fimitation.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-:0-5"><span class="mw-cite-backlink">^ <a href="#cite_ref-:0_5-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-:0_5-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFPomerleau1988" class="citation journal cs1">Pomerleau, Dean A. (1988). <a rel="nofollow" class="external text" href="https://proceedings.neurips.cc/paper/1988/hash/812b4ba287f5ee0bc9d43bbf5bbe87fb-Abstract.html">"ALVINN: An Autonomous Land Vehicle in a Neural Network"</a>. <i>Advances in Neural Information Processing Systems</i>. <b>1</b>. Morgan-Kaufmann.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Advances+in+Neural+Information+Processing+Systems&rft.atitle=ALVINN%3A+An+Autonomous+Land+Vehicle+in+a+Neural+Network&rft.volume=1&rft.date=1988&rft.aulast=Pomerleau&rft.aufirst=Dean+A.&rft_id=https%3A%2F%2Fproceedings.neurips.cc%2Fpaper%2F1988%2Fhash%2F812b4ba287f5ee0bc9d43bbf5bbe87fb-Abstract.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFBojarskiDel_TestaDworakowskiFirner2016" class="citation arxiv cs1">Bojarski, Mariusz; Del Testa, Davide; Dworakowski, Daniel; Firner, Bernhard; Flepp, Beat; Goyal, Prasoon; Jackel, Lawrence D.; Monfort, Mathew; Muller, Urs (2016-04-25). "End to End Learning for Self-Driving Cars". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1604.07316v1">1604.07316v1</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CV">cs.CV</a>].</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=preprint&rft.jtitle=arXiv&rft.atitle=End+to+End+Learning+for+Self-Driving+Cars&rft.date=2016-04-25&rft_id=info%3Aarxiv%2F1604.07316v1&rft.aulast=Bojarski&rft.aufirst=Mariusz&rft.au=Del+Testa%2C+Davide&rft.au=Dworakowski%2C+Daniel&rft.au=Firner%2C+Bernhard&rft.au=Flepp%2C+Beat&rft.au=Goyal%2C+Prasoon&rft.au=Jackel%2C+Lawrence+D.&rft.au=Monfort%2C+Mathew&rft.au=Muller%2C+Urs&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-7">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFKiranSobhTalpaertMannion2022" class="citation journal cs1">Kiran, B Ravi; Sobh, Ibrahim; Talpaert, Victor; Mannion, Patrick; Sallab, Ahmad A. Al; Yogamani, Senthil; Perez, Patrick (June 2022). <a rel="nofollow" class="external text" href="https://ieeexplore.ieee.org/document/9351818">"Deep Reinforcement Learning for Autonomous Driving: A Survey"</a>. <i>IEEE Transactions on Intelligent Transportation Systems</i>. <b>23</b> (6): 4909–4926. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2002.00444">2002.00444</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FTITS.2021.3054625">10.1109/TITS.2021.3054625</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/1524-9050">1524-9050</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=IEEE+Transactions+on+Intelligent+Transportation+Systems&rft.atitle=Deep+Reinforcement+Learning+for+Autonomous+Driving%3A+A+Survey&rft.volume=23&rft.issue=6&rft.pages=4909-4926&rft.date=2022-06&rft_id=info%3Aarxiv%2F2002.00444&rft.issn=1524-9050&rft_id=info%3Adoi%2F10.1109%2FTITS.2021.3054625&rft.aulast=Kiran&rft.aufirst=B+Ravi&rft.au=Sobh%2C+Ibrahim&rft.au=Talpaert%2C+Victor&rft.au=Mannion%2C+Patrick&rft.au=Sallab%2C+Ahmad+A.+Al&rft.au=Yogamani%2C+Senthil&rft.au=Perez%2C+Patrick&rft_id=https%3A%2F%2Fieeexplore.ieee.org%2Fdocument%2F9351818&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFGiustiGuzziCiresanHe2016" class="citation journal cs1">Giusti, Alessandro; Guzzi, Jerome; Ciresan, Dan C.; He, Fang-Lin; Rodriguez, Juan P.; Fontana, Flavio; Faessler, Matthias; Forster, Christian; Schmidhuber, Jurgen; Caro, Gianni Di; Scaramuzza, Davide; Gambardella, Luca M. (July 2016). <a rel="nofollow" class="external text" href="https://ieeexplore.ieee.org/document/7358076">"A Machine Learning Approach to Visual Perception of Forest Trails for Mobile Robots"</a>. <i>IEEE Robotics and Automation Letters</i>. <b>1</b> (2): 661–667. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FLRA.2015.2509024">10.1109/LRA.2015.2509024</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/2377-3766">2377-3766</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=IEEE+Robotics+and+Automation+Letters&rft.atitle=A+Machine+Learning+Approach+to+Visual+Perception+of+Forest+Trails+for+Mobile+Robots&rft.volume=1&rft.issue=2&rft.pages=661-667&rft.date=2016-07&rft_id=info%3Adoi%2F10.1109%2FLRA.2015.2509024&rft.issn=2377-3766&rft.aulast=Giusti&rft.aufirst=Alessandro&rft.au=Guzzi%2C+Jerome&rft.au=Ciresan%2C+Dan+C.&rft.au=He%2C+Fang-Lin&rft.au=Rodriguez%2C+Juan+P.&rft.au=Fontana%2C+Flavio&rft.au=Faessler%2C+Matthias&rft.au=Forster%2C+Christian&rft.au=Schmidhuber%2C+Jurgen&rft.au=Caro%2C+Gianni+Di&rft.au=Scaramuzza%2C+Davide&rft.au=Gambardella%2C+Luca+M.&rft_id=https%3A%2F%2Fieeexplore.ieee.org%2Fdocument%2F7358076&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://heli.stanford.edu/">"Autonomous Helicopter: Stanford University AI Lab"</a>. <i>heli.stanford.edu</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-08-08</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=heli.stanford.edu&rft.atitle=Autonomous+Helicopter%3A+Stanford+University+AI+Lab&rft_id=http%3A%2F%2Fheli.stanford.edu%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-10">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFNakanishiMorimotoEndoCheng2004" class="citation journal cs1">Nakanishi, Jun; Morimoto, Jun; Endo, Gen; Cheng, Gordon; Schaal, Stefan; Kawato, Mitsuo (June 2004). <a rel="nofollow" class="external text" href="https://linkinghub.elsevier.com/retrieve/pii/S0921889004000399">"Learning from demonstration and adaptation of biped locomotion"</a>. <i>Robotics and Autonomous Systems</i>. <b>47</b> (2–3): 79–91. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.robot.2004.03.003">10.1016/j.robot.2004.03.003</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Robotics+and+Autonomous+Systems&rft.atitle=Learning+from+demonstration+and+adaptation+of+biped+locomotion&rft.volume=47&rft.issue=2%E2%80%933&rft.pages=79-91&rft.date=2004-06&rft_id=info%3Adoi%2F10.1016%2Fj.robot.2004.03.003&rft.aulast=Nakanishi&rft.aufirst=Jun&rft.au=Morimoto%2C+Jun&rft.au=Endo%2C+Gen&rft.au=Cheng%2C+Gordon&rft.au=Schaal%2C+Stefan&rft.au=Kawato%2C+Mitsuo&rft_id=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0921889004000399&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-11">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFKalakrishnanBuchliPastorSchaal2009" class="citation book cs1">Kalakrishnan, Mrinal; Buchli, Jonas; Pastor, Peter; Schaal, Stefan (October 2009). <a rel="nofollow" class="external text" href="https://dx.doi.org/10.1109/iros.2009.5354701">"Learning locomotion over rough terrain using terrain templates"</a>. <i>2009 IEEE/RSJ International Conference on Intelligent Robots and Systems</i>. IEEE. pp. 167–172. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2Firos.2009.5354701">10.1109/iros.2009.5354701</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a> <a href="/wiki/Special:BookSources/978-1-4244-3803-7" title="Special:BookSources/978-1-4244-3803-7"><bdi>978-1-4244-3803-7</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=bookitem&rft.atitle=Learning+locomotion+over+rough+terrain+using+terrain+templates&rft.btitle=2009+IEEE%2FRSJ+International+Conference+on+Intelligent+Robots+and+Systems&rft.pages=167-172&rft.pub=IEEE&rft.date=2009-10&rft_id=info%3Adoi%2F10.1109%2Firos.2009.5354701&rft.isbn=978-1-4244-3803-7&rft.aulast=Kalakrishnan&rft.aufirst=Mrinal&rft.au=Buchli%2C+Jonas&rft.au=Pastor%2C+Peter&rft.au=Schaal%2C+Stefan&rft_id=http%3A%2F%2Fdx.doi.org%2F10.1109%2Firos.2009.5354701&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-:1-12"><span class="mw-cite-backlink">^ <a href="#cite_ref-:1_12-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-:1_12-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-:1_12-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://rail.eecs.berkeley.edu/deeprlcourse/deeprlcourse/static/slides/lec-2.pdf">CS 285 at UC Berkeley: Deep Reinforcement Learning. Lecture 2: Supervised Learning of Behaviors</a></span> </li> <li id="cite_note-13"><span class="mw-cite-backlink"><b><a href="#cite_ref-13">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFRossGordonBagnell2011" class="citation journal cs1">Ross, Stephane; Gordon, Geoffrey; Bagnell, Drew (2011-06-14). <a rel="nofollow" class="external text" href="https://proceedings.mlr.press/v15/ross11a.html">"A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning"</a>. <i>Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics</i>. JMLR Workshop and Conference Proceedings: 627–635.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Proceedings+of+the+Fourteenth+International+Conference+on+Artificial+Intelligence+and+Statistics&rft.atitle=A+Reduction+of+Imitation+Learning+and+Structured+Prediction+to+No-Regret+Online+Learning&rft.pages=627-635&rft.date=2011-06-14&rft.aulast=Ross&rft.aufirst=Stephane&rft.au=Gordon%2C+Geoffrey&rft.au=Bagnell%2C+Drew&rft_id=https%3A%2F%2Fproceedings.mlr.press%2Fv15%2Fross11a.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-14"><span class="mw-cite-backlink"><b><a href="#cite_ref-14">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFChenLuRajeswaranLee2021" class="citation journal cs1">Chen, Lili; Lu, Kevin; Rajeswaran, Aravind; Lee, Kimin; Grover, Aditya; Laskin, Misha; Abbeel, Pieter; Srinivas, Aravind; Mordatch, Igor (2021). <a rel="nofollow" class="external text" href="https://proceedings.neurips.cc/paper/2021/hash/7f489f642a0ddb10272b5c31057f0663-Abstract.html">"Decision Transformer: Reinforcement Learning via Sequence Modeling"</a>. <i>Advances in Neural Information Processing Systems</i>. <b>34</b>. Curran Associates, Inc.: 15084–15097. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2106.01345">2106.01345</a></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Advances+in+Neural+Information+Processing+Systems&rft.atitle=Decision+Transformer%3A+Reinforcement+Learning+via+Sequence+Modeling&rft.volume=34&rft.pages=15084-15097&rft.date=2021&rft_id=info%3Aarxiv%2F2106.01345&rft.aulast=Chen&rft.aufirst=Lili&rft.au=Lu%2C+Kevin&rft.au=Rajeswaran%2C+Aravind&rft.au=Lee%2C+Kimin&rft.au=Grover%2C+Aditya&rft.au=Laskin%2C+Misha&rft.au=Abbeel%2C+Pieter&rft.au=Srinivas%2C+Aravind&rft.au=Mordatch%2C+Igor&rft_id=https%3A%2F%2Fproceedings.neurips.cc%2Fpaper%2F2021%2Fhash%2F7f489f642a0ddb10272b5c31057f0663-Abstract.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-15"><span class="mw-cite-backlink"><b><a href="#cite_ref-15">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLeeNachumYangLee2022" class="citation cs2">Lee, Kuang-Huei; Nachum, Ofir; Yang, Mengjiao; Lee, Lisa; Freeman, Daniel; Xu, Winnie; Guadarrama, Sergio; Fischer, Ian; Jang, Eric (2022-10-15), <a rel="nofollow" class="external text" href="https://arxiv.org/abs/2205.15241"><i>Multi-Game Decision Transformers</i></a>, <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2205.15241">2205.15241</a></span><span class="reference-accessdate">, retrieved <span class="nowrap">2024-10-22</span></span></cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=Multi-Game+Decision+Transformers&rft.date=2022-10-15&rft_id=info%3Aarxiv%2F2205.15241&rft.aulast=Lee&rft.aufirst=Kuang-Huei&rft.au=Nachum%2C+Ofir&rft.au=Yang%2C+Mengjiao&rft.au=Lee%2C+Lisa&rft.au=Freeman%2C+Daniel&rft.au=Xu%2C+Winnie&rft.au=Guadarrama%2C+Sergio&rft.au=Fischer%2C+Ian&rft.au=Jang%2C+Eric&rft_id=https%3A%2F%2Farxiv.org%2Fabs%2F2205.15241&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-:2-16"><span class="mw-cite-backlink">^ <a href="#cite_ref-:2_16-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-:2_16-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHesterVecerikPietquinLanctot2017" class="citation arxiv cs1">Hester, Todd; Vecerik, Matej; Pietquin, Olivier; Lanctot, Marc; Schaul, Tom; Piot, Bilal; Horgan, Dan; Quan, John; Sendonaris, Andrew (2017-04-12). "Deep Q-learning from Demonstrations". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1704.03732v4">1704.03732v4</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=preprint&rft.jtitle=arXiv&rft.atitle=Deep+Q-learning+from+Demonstrations&rft.date=2017-04-12&rft_id=info%3Aarxiv%2F1704.03732v4&rft.aulast=Hester&rft.aufirst=Todd&rft.au=Vecerik%2C+Matej&rft.au=Pietquin%2C+Olivier&rft.au=Lanctot%2C+Marc&rft.au=Schaul%2C+Tom&rft.au=Piot%2C+Bilal&rft.au=Horgan%2C+Dan&rft.au=Quan%2C+John&rft.au=Sendonaris%2C+Andrew&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-17"><span class="mw-cite-backlink"><b><a href="#cite_ref-17">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFDuanAndrychowiczStadieJonathan_Ho2017" class="citation journal cs1">Duan, Yan; Andrychowicz, Marcin; Stadie, Bradly; Jonathan Ho, OpenAI; Schneider, Jonas; Sutskever, Ilya; Abbeel, Pieter; Zaremba, Wojciech (2017). <a rel="nofollow" class="external text" href="https://proceedings.neurips.cc/paper_files/paper/2017/hash/ba3866600c3540f67c1e9575e213be0a-Abstract.html">"One-Shot Imitation Learning"</a>. <i>Advances in Neural Information Processing Systems</i>. <b>30</b>. Curran Associates, Inc.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Advances+in+Neural+Information+Processing+Systems&rft.atitle=One-Shot+Imitation+Learning&rft.volume=30&rft.date=2017&rft.aulast=Duan&rft.aufirst=Yan&rft.au=Andrychowicz%2C+Marcin&rft.au=Stadie%2C+Bradly&rft.au=Jonathan+Ho%2C+OpenAI&rft.au=Schneider%2C+Jonas&rft.au=Sutskever%2C+Ilya&rft.au=Abbeel%2C+Pieter&rft.au=Zaremba%2C+Wojciech&rft_id=https%3A%2F%2Fproceedings.neurips.cc%2Fpaper_files%2Fpaper%2F2017%2Fhash%2Fba3866600c3540f67c1e9575e213be0a-Abstract.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-18">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFA2000" class="citation journal cs1">A, Ng (2000). <a rel="nofollow" class="external text" href="https://cir.nii.ac.jp/crid/1570854174323564160">"Algorithms for Inverse Reinforcement Learning"</a>. <i>Proc. Of 17th International Conference on Machine Learning, 2000</i>: 663–670.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Proc.+Of+17th+International+Conference+on+Machine+Learning%2C+2000&rft.atitle=Algorithms+for+Inverse+Reinforcement+Learning&rft.pages=663-670&rft.date=2000&rft.aulast=A&rft.aufirst=Ng&rft_id=https%3A%2F%2Fcir.nii.ac.jp%2Fcrid%2F1570854174323564160&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-19"><span class="mw-cite-backlink"><b><a href="#cite_ref-19">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHoErmon2016" class="citation journal cs1">Ho, Jonathan; Ermon, Stefano (2016). <a rel="nofollow" class="external text" href="https://proceedings.neurips.cc/paper_files/paper/2016/hash/cc7e2b878868cbae992d1fb743995d8f-Abstract.html">"Generative Adversarial Imitation Learning"</a>. <i>Advances in Neural Information Processing Systems</i>. <b>29</b>. Curran Associates, Inc. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1606.03476">1606.03476</a></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Advances+in+Neural+Information+Processing+Systems&rft.atitle=Generative+Adversarial+Imitation+Learning&rft.volume=29&rft.date=2016&rft_id=info%3Aarxiv%2F1606.03476&rft.aulast=Ho&rft.aufirst=Jonathan&rft.au=Ermon%2C+Stefano&rft_id=https%3A%2F%2Fproceedings.neurips.cc%2Fpaper_files%2Fpaper%2F2016%2Fhash%2Fcc7e2b878868cbae992d1fb743995d8f-Abstract.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> <li id="cite_note-20"><span class="mw-cite-backlink"><b><a href="#cite_ref-20">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSyedSchapire2007" class="citation journal cs1">Syed, Umar; Schapire, Robert E (2007). <a rel="nofollow" class="external text" href="https://proceedings.neurips.cc/paper_files/paper/2007/hash/ca3ec598002d2e7662e2ef4bdd58278b-Abstract.html">"A Game-Theoretic Approach to Apprenticeship Learning"</a>. <i>Advances in Neural Information Processing Systems</i>. <b>20</b>. Curran Associates, Inc.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Advances+in+Neural+Information+Processing+Systems&rft.atitle=A+Game-Theoretic+Approach+to+Apprenticeship+Learning&rft.volume=20&rft.date=2007&rft.aulast=Syed&rft.aufirst=Umar&rft.au=Schapire%2C+Robert+E&rft_id=https%3A%2F%2Fproceedings.neurips.cc%2Fpaper_files%2Fpaper%2F2007%2Fhash%2Fca3ec598002d2e7662e2ef4bdd58278b-Abstract.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AImitation+learning" class="Z3988"></span></span> </li> </ol></div><div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="Artificial_intelligence" style="padding:3px"><table class="nowraplinks hlist mw-collapsible {{{state}}} navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Artificial_intelligence_navbox" title="Template:Artificial intelligence navbox"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Artificial_intelligence_navbox" title="Template talk:Artificial intelligence navbox"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Artificial_intelligence_navbox" title="Special:EditPage/Template:Artificial intelligence navbox"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Artificial_intelligence" style="font-size:114%;margin:0 4em"><a href="/wiki/Artificial_intelligence" title="Artificial intelligence">Artificial intelligence</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">Concepts</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Parameter" title="Parameter">Parameter</a> <ul><li><a href="/wiki/Hyperparameter_(machine_learning)" title="Hyperparameter (machine learning)">Hyperparameter</a></li></ul></li> <li><a href="/wiki/Loss_functions_for_classification" title="Loss functions for classification">Loss functions</a></li> <li><a href="/wiki/Regression_analysis" title="Regression analysis">Regression</a> <ul><li><a href="/wiki/Bias%E2%80%93variance_tradeoff" title="Bias–variance tradeoff">Bias–variance tradeoff</a></li> <li><a href="/wiki/Double_descent" title="Double descent">Double descent</a></li> <li><a href="/wiki/Overfitting" title="Overfitting">Overfitting</a></li></ul></li> <li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></li> <li><a href="/wiki/Gradient_descent" title="Gradient descent">Gradient descent</a> <ul><li><a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">SGD</a></li> <li><a href="/wiki/Quasi-Newton_method" title="Quasi-Newton method">Quasi-Newton method</a></li> <li><a href="/wiki/Conjugate_gradient_method" title="Conjugate gradient method">Conjugate gradient method</a></li></ul></li> <li><a href="/wiki/Backpropagation" title="Backpropagation">Backpropagation</a></li> <li><a href="/wiki/Attention_(machine_learning)" title="Attention (machine learning)">Attention</a></li> <li><a href="/wiki/Convolution" title="Convolution">Convolution</a></li> <li><a href="/wiki/Normalization_(machine_learning)" title="Normalization (machine learning)">Normalization</a> <ul><li><a href="/wiki/Batch_normalization" title="Batch normalization">Batchnorm</a></li></ul></li> <li><a href="/wiki/Activation_function" title="Activation function">Activation</a> <ul><li><a href="/wiki/Softmax_function" title="Softmax function">Softmax</a></li> <li><a href="/wiki/Sigmoid_function" title="Sigmoid function">Sigmoid</a></li> <li><a href="/wiki/Rectifier_(neural_networks)" title="Rectifier (neural networks)">Rectifier</a></li></ul></li> <li><a href="/wiki/Gating_mechanism" title="Gating mechanism">Gating</a></li> <li><a href="/wiki/Weight_initialization" title="Weight initialization">Weight initialization</a></li> <li><a href="/wiki/Regularization_(mathematics)" title="Regularization (mathematics)">Regularization</a></li> <li><a href="/wiki/Training,_validation,_and_test_data_sets" title="Training, validation, and test data sets">Datasets</a> <ul><li><a href="/wiki/Data_augmentation" title="Data augmentation">Augmentation</a></li></ul></li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a> <ul><li><a href="/wiki/Q-learning" title="Q-learning">Q-learning</a></li> <li><a href="/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action" title="State–action–reward–state–action">SARSA</a></li> <li><a class="mw-selflink selflink">Imitation</a></li></ul></li> <li><a href="/wiki/Diffusion_process" title="Diffusion process">Diffusion</a></li> <li><a href="/wiki/Autoregressive_model" title="Autoregressive model">Autoregression</a></li> <li><a href="/wiki/Adversarial_machine_learning" title="Adversarial machine learning">Adversary</a></li> <li><a href="/wiki/Hallucination_(artificial_intelligence)" title="Hallucination (artificial intelligence)">Hallucination</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Applications</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a> <ul><li><a href="/wiki/Prompt_engineering#In-context_learning" title="Prompt engineering">In-context learning</a></li></ul></li> <li><a href="/wiki/Neural_network_(machine_learning)" title="Neural network (machine learning)">Artificial neural network</a> <ul><li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li></ul></li> <li><a href="/wiki/Language_model" title="Language model">Language model</a> <ul><li><a href="/wiki/Large_language_model" title="Large language model">Large language model</a></li> <li><a href="/wiki/Neural_machine_translation" title="Neural machine translation">NMT</a></li></ul></li> <li><a href="/wiki/Artificial_general_intelligence" title="Artificial general intelligence">Artificial general intelligence</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Implementations</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Audio–visual</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlexNet" title="AlexNet">AlexNet</a></li> <li><a href="/wiki/WaveNet" title="WaveNet">WaveNet</a></li> <li><a href="/wiki/Human_image_synthesis" title="Human image synthesis">Human image synthesis</a></li> <li><a href="/wiki/Handwriting_recognition" title="Handwriting recognition">HWR</a></li> <li><a href="/wiki/Optical_character_recognition" title="Optical character recognition">OCR</a></li> <li><a href="/wiki/Deep_learning_speech_synthesis" title="Deep learning speech synthesis">Speech synthesis</a> <ul><li><a href="/wiki/ElevenLabs" title="ElevenLabs">ElevenLabs</a></li></ul></li> <li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a></li> <li><a href="/wiki/Facial_recognition_system" title="Facial recognition system">Facial recognition</a></li> <li><a href="/wiki/AlphaFold" title="AlphaFold">AlphaFold</a></li> <li><a href="/wiki/Text-to-image_model" title="Text-to-image model">Text-to-image models</a> <ul><li><a href="/wiki/Latent_diffusion_model" title="Latent diffusion model">Latent diffusion model</a></li> <li><a href="/wiki/DALL-E" title="DALL-E">DALL-E</a></li> <li><a href="/wiki/Flux_(text-to-image_model)" title="Flux (text-to-image model)">Flux</a></li> <li><a href="/wiki/Ideogram_(text-to-image_model)" title="Ideogram (text-to-image model)">Ideogram</a></li> <li><a href="/wiki/Midjourney" title="Midjourney">Midjourney</a></li> <li><a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a></li></ul></li> <li><a href="/wiki/Text-to-video_model" title="Text-to-video model">Text-to-video models</a> <ul><li><a href="/wiki/Sora_(text-to-video_model)" title="Sora (text-to-video model)">Sora</a></li> <li><a href="/wiki/Dream_Machine_(text-to-video_model)" title="Dream Machine (text-to-video model)">Dream Machine</a></li> <li><a href="/wiki/VideoPoet" title="VideoPoet">VideoPoet</a></li></ul></li> <li><a href="/wiki/Whisper_(speech_recognition_system)" title="Whisper (speech recognition system)">Whisper</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Text</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Word2vec" title="Word2vec">Word2vec</a></li> <li><a href="/wiki/Seq2seq" title="Seq2seq">Seq2seq</a></li> <li><a href="/wiki/GloVe" title="GloVe">GloVe</a></li> <li><a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a></li> <li><a href="/wiki/T5_(language_model)" title="T5 (language model)">T5</a></li> <li><a href="/wiki/Llama_(language_model)" title="Llama (language model)">Llama</a></li> <li><a href="/wiki/Chinchilla_(language_model)" title="Chinchilla (language model)">Chinchilla AI</a></li> <li><a href="/wiki/PaLM" title="PaLM">PaLM</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPT</a> <ul><li><a href="/wiki/GPT-1" title="GPT-1">1</a></li> <li><a href="/wiki/GPT-J" title="GPT-J">J</a></li> <li><a href="/wiki/GPT-2" title="GPT-2">2</a></li> <li><a href="/wiki/GPT-3" title="GPT-3">3</a></li> <li><a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a></li> <li><a href="/wiki/GPT-4" title="GPT-4">4</a></li> <li><a href="/wiki/GPT-4o" title="GPT-4o">4o</a></li> <li><a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a></li></ul></li> <li><a href="/wiki/Claude_(language_model)" title="Claude (language model)">Claude</a></li> <li><a href="/wiki/Gemini_(language_model)" title="Gemini (language model)">Gemini</a></li> <li><a href="/wiki/Grok_(chatbot)" title="Grok (chatbot)">Grok</a></li> <li><a href="/wiki/LaMDA" title="LaMDA">LaMDA</a></li> <li><a href="/wiki/BLOOM_(language_model)" title="BLOOM (language model)">BLOOM</a></li> <li><a href="/wiki/Project_Debater" title="Project Debater">Project Debater</a></li> <li><a href="/wiki/IBM_Watson" title="IBM Watson">IBM Watson</a></li> <li><a href="/wiki/IBM_Watsonx" title="IBM Watsonx">IBM Watsonx</a></li> <li><a href="/wiki/IBM_Granite" title="IBM Granite">Granite</a></li> <li><a href="/wiki/Huawei_PanGu" title="Huawei PanGu">PanGu-Σ</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Decisional</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlphaGo" title="AlphaGo">AlphaGo</a></li> <li><a href="/wiki/AlphaZero" title="AlphaZero">AlphaZero</a></li> <li><a href="/wiki/OpenAI_Five" title="OpenAI Five">OpenAI Five</a></li> <li><a href="/wiki/Self-driving_car" title="Self-driving car">Self-driving car</a></li> <li><a href="/wiki/MuZero" title="MuZero">MuZero</a></li> <li><a href="/wiki/Action_selection" title="Action selection">Action selection</a> <ul><li><a href="/wiki/AutoGPT" title="AutoGPT">AutoGPT</a></li></ul></li> <li><a href="/wiki/Robot_control" title="Robot control">Robot control</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">People</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Alan_Turing" title="Alan Turing">Alan Turing</a></li> <li><a href="/wiki/Claude_Shannon" title="Claude Shannon">Claude Shannon</a></li> <li><a href="/wiki/Allen_Newell" title="Allen Newell">Allen Newell</a></li> <li><a href="/wiki/Herbert_A._Simon" title="Herbert A. Simon">Herbert A. Simon</a></li> <li><a href="/wiki/Frank_Rosenblatt" title="Frank Rosenblatt">Frank Rosenblatt</a></li> <li><a href="/wiki/Marvin_Minsky" title="Marvin Minsky">Marvin Minsky</a></li> <li><a href="/wiki/John_McCarthy_(computer_scientist)" title="John McCarthy (computer scientist)">John McCarthy</a></li> <li><a href="/wiki/Nathaniel_Rochester_(computer_scientist)" title="Nathaniel Rochester (computer scientist)">Nathaniel Rochester</a></li> <li><a href="/wiki/Seymour_Papert" title="Seymour Papert">Seymour Papert</a></li> <li><a href="/wiki/Joseph_Weizenbaum" title="Joseph Weizenbaum">Joseph Weizenbaum</a></li> <li><a href="/wiki/Bernard_Widrow" title="Bernard Widrow">Bernard Widrow</a></li> <li><a href="/wiki/Paul_Werbos" title="Paul Werbos">Paul Werbos</a></li> <li><a href="/wiki/Yoshua_Bengio" title="Yoshua Bengio">Yoshua Bengio</a></li> <li><a href="/wiki/Alex_Graves_(computer_scientist)" title="Alex Graves (computer scientist)">Alex Graves</a></li> <li><a href="/wiki/Ian_Goodfellow" title="Ian Goodfellow">Ian Goodfellow</a></li> <li><a href="/wiki/Stephen_Grossberg" title="Stephen Grossberg">Stephen Grossberg</a></li> <li><a href="/wiki/Demis_Hassabis" title="Demis Hassabis">Demis Hassabis</a></li> <li><a href="/wiki/Geoffrey_Hinton" title="Geoffrey Hinton">Geoffrey Hinton</a></li> <li><a href="/wiki/Yann_LeCun" title="Yann LeCun">Yann LeCun</a></li> <li><a href="/wiki/Fei-Fei_Li" title="Fei-Fei Li">Fei-Fei Li</a></li> <li><a href="/wiki/Andrew_Ng" title="Andrew Ng">Andrew Ng</a></li> <li><a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a></li> <li><a href="/wiki/David_Silver_(computer_scientist)" title="David Silver (computer scientist)">David Silver</a></li> <li><a href="/wiki/Ilya_Sutskever" title="Ilya Sutskever">Ilya Sutskever</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Organizations</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Anthropic" title="Anthropic">Anthropic</a></li> <li><a href="/wiki/EleutherAI" title="EleutherAI">EleutherAI</a></li> <li><a href="/wiki/Google_DeepMind" title="Google DeepMind">Google DeepMind</a></li> <li><a href="/wiki/Hugging_Face" title="Hugging Face">Hugging Face</a></li> <li><a href="/wiki/Kuaishou" title="Kuaishou">Kuaishou</a></li> <li><a href="/wiki/Meta_AI" title="Meta AI">Meta AI</a></li> <li><a href="/wiki/Mila_(research_institute)" title="Mila (research institute)">Mila</a></li> <li><a href="/wiki/MiniMax_(company)" title="MiniMax (company)">MiniMax</a></li> <li><a href="/wiki/Mistral_AI" title="Mistral AI">Mistral AI</a></li> <li><a href="/wiki/MIT_Computer_Science_and_Artificial_Intelligence_Laboratory" title="MIT Computer Science and Artificial Intelligence Laboratory">MIT CSAIL</a></li> <li><a href="/wiki/OpenAI" title="OpenAI">OpenAI</a></li> <li><a href="/wiki/Runway_(company)" title="Runway (company)">Runway</a></li> <li><a href="/wiki/XAI_(company)" title="XAI (company)">xAI</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Architectures</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Neural_Turing_machine" title="Neural Turing machine">Neural Turing machine</a></li> <li><a href="/wiki/Differentiable_neural_computer" title="Differentiable neural computer">Differentiable neural computer</a></li> <li><a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a> <ul><li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision transformer (ViT)</a></li></ul></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network (RNN)</a></li> <li><a href="/wiki/Long_short-term_memory" title="Long short-term memory">Long short-term memory (LSTM)</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">Gated recurrent unit (GRU)</a></li> <li><a href="/wiki/Echo_state_network" title="Echo state network">Echo state network</a></li> <li><a href="/wiki/Multilayer_perceptron" title="Multilayer perceptron">Multilayer perceptron (MLP)</a></li> <li><a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">Convolutional neural network (CNN)</a></li> <li><a href="/wiki/Residual_neural_network" title="Residual neural network">Residual neural network (RNN)</a></li> <li><a href="/wiki/Highway_network" title="Highway network">Highway network</a></li> <li><a href="/wiki/Mamba_(deep_learning_architecture)" title="Mamba (deep learning architecture)">Mamba</a></li> <li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Variational_autoencoder" title="Variational autoencoder">Variational autoencoder (VAE)</a></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">Generative adversarial network (GAN)</a></li> <li><a href="/wiki/Graph_neural_network" title="Graph neural network">Graph neural network (GNN)</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><a href="/wiki/File:Symbol_portal_class.svg" class="mw-file-description" title="Portal"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/16px-Symbol_portal_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/23px-Symbol_portal_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/31px-Symbol_portal_class.svg.png 2x" data-file-width="180" data-file-height="185" /></a></span> Portals <ul><li><a href="/wiki/Portal:Technology" title="Portal:Technology">Technology</a></li></ul></li> <li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> Categories <ul><li><a href="/wiki/Category:Artificial_neural_networks" title="Category:Artificial neural networks">Artificial neural networks</a></li> <li><a href="/wiki/Category:Machine_learning" title="Category:Machine learning">Machine learning</a></li></ul></li></ul> </div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐web.eqiad.main‐5dc468848‐492vw Cached time: 20241122160704 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.487 seconds Real time usage: 1.177 seconds Preprocessor visited node count: 1600/1000000 Post‐expand include size: 75458/2097152 bytes Template argument size: 750/2097152 bytes Highest expansion depth: 8/100 Expensive parser function count: 1/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 82867/5000000 bytes Lua time usage: 0.333/10.000 seconds Lua memory usage: 5065833/52428800 bytes Number of Wikibase entities loaded: 0/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 901.684 1 -total 33.92% 305.895 12 Template:Cite_journal 33.67% 303.556 1 Template:Artificial_intelligence_(AI) 21.98% 198.208 1 Template:Short_description 20.17% 181.858 2 Template:Navbox 15.77% 142.198 2 Template:Main_other 15.48% 139.544 1 Template:SDcat 4.88% 44.006 2 Template:Pagetype 2.37% 21.393 3 Template:Cite_book 2.13% 19.239 2 Template:Cite_arXiv --> <!-- Saved in parser cache with key enwiki:pcache:73429960:|#|:idhash:canonical and timestamp 20241122160704 and revision id 1257430501. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Imitation_learning&oldid=1257430501">https://en.wikipedia.org/w/index.php?title=Imitation_learning&oldid=1257430501</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Category</a>: <ul><li><a href="/wiki/Category:Supervised_learning" title="Category:Supervised learning">Supervised learning</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_matches_Wikidata" title="Category:Short description matches Wikidata">Short description matches Wikidata</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 14 November 2024, at 21:28<span class="anonymous-show"> (UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Imitation_learning&mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-f69cdc8f6-g6cgs","wgBackendResponseTime":156,"wgPageParseReport":{"limitreport":{"cputime":"0.487","walltime":"1.177","ppvisitednodes":{"value":1600,"limit":1000000},"postexpandincludesize":{"value":75458,"limit":2097152},"templateargumentsize":{"value":750,"limit":2097152},"expansiondepth":{"value":8,"limit":100},"expensivefunctioncount":{"value":1,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":82867,"limit":5000000},"entityaccesscount":{"value":0,"limit":400},"timingprofile":["100.00% 901.684 1 -total"," 33.92% 305.895 12 Template:Cite_journal"," 33.67% 303.556 1 Template:Artificial_intelligence_(AI)"," 21.98% 198.208 1 Template:Short_description"," 20.17% 181.858 2 Template:Navbox"," 15.77% 142.198 2 Template:Main_other"," 15.48% 139.544 1 Template:SDcat"," 4.88% 44.006 2 Template:Pagetype"," 2.37% 21.393 3 Template:Cite_book"," 2.13% 19.239 2 Template:Cite_arXiv"]},"scribunto":{"limitreport-timeusage":{"value":"0.333","limit":"10.000"},"limitreport-memusage":{"value":5065833,"limit":52428800}},"cachereport":{"origin":"mw-web.eqiad.main-5dc468848-492vw","timestamp":"20241122160704","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Imitation learning","url":"https:\/\/en.wikipedia.org\/wiki\/Imitation_learning","sameAs":"http:\/\/www.wikidata.org\/entity\/Q124742024","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q124742024","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2023-03-31T15:31:40Z","dateModified":"2024-11-14T21:28:10Z","headline":"machine learning technique where agents learn from demonstrations"}</script> </body> </html>