CINXE.COM

Multi-agent reinforcement learning - Wikipedia

<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Multi-agent reinforcement learning - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"a3b7bb6a-b53a-4447-a84e-6f935b4cb2f5","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Multi-agent_reinforcement_learning","wgTitle":"Multi-agent reinforcement learning","wgCurRevisionId":1236297949,"wgRevisionId":1236297949,"wgArticleId":62285602,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description matches Wikidata","Reinforcement learning","Multi-agent systems","Deep learning","Game theory"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Multi-agent_reinforcement_learning","wgRelevantArticleId":62285602,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[], "wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":30000,"wgRelatedArticlesCompat":[],"wgCentralAuthMobileDomain":false,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q85786957","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false, "wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","ext.math.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","mediawiki.page.media","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups", "ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession","wikibase.sidebar.tracking"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=ext.cite.styles%7Cext.math.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&amp;only=styles&amp;skin=vector-2022"> <script async="" src="/w/load.php?lang=en&amp;modules=startup&amp;only=scripts&amp;raw=1&amp;skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=site.styles&amp;only=styles&amp;skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/d/da/Magent-graph-2.gif"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="1075"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/d/da/Magent-graph-2.gif"> <meta property="og:image:width" content="800"> <meta property="og:image:height" content="717"> <meta property="og:image:width" content="640"> <meta property="og:image:height" content="573"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Multi-agent reinforcement learning - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Multi-agent_reinforcement_learning"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Multi-agent_reinforcement_learning"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Multi-agent_reinforcement_learning rootpage-Multi-agent_reinforcement_learning skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page&#039;s font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=C13_en.wikipedia.org&amp;uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&amp;returnto=Multi-agent+reinforcement+learning" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&amp;returnto=Multi-agent+reinforcement+learning" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=C13_en.wikipedia.org&amp;uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&amp;returnto=Multi-agent+reinforcement+learning" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Multi-agent+reinforcement+learning" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-Definition" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Definition"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Definition</span> </div> </a> <ul id="toc-Definition-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Cooperation_vs._competition" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Cooperation_vs._competition"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Cooperation vs. competition</span> </div> </a> <button aria-controls="toc-Cooperation_vs._competition-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Cooperation vs. competition subsection</span> </button> <ul id="toc-Cooperation_vs._competition-sublist" class="vector-toc-list"> <li id="toc-Pure_competition_settings" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Pure_competition_settings"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1</span> <span>Pure competition settings</span> </div> </a> <ul id="toc-Pure_competition_settings-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Pure_cooperation_settings" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Pure_cooperation_settings"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.2</span> <span>Pure cooperation settings</span> </div> </a> <ul id="toc-Pure_cooperation_settings-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Mixed-sum_settings" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Mixed-sum_settings"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.3</span> <span>Mixed-sum settings</span> </div> </a> <ul id="toc-Mixed-sum_settings-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Social_dilemmas" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Social_dilemmas"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Social dilemmas</span> </div> </a> <button aria-controls="toc-Social_dilemmas-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Social dilemmas subsection</span> </button> <ul id="toc-Social_dilemmas-sublist" class="vector-toc-list"> <li id="toc-Sequential_social_dilemmas" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Sequential_social_dilemmas"> <div class="vector-toc-text"> <span class="vector-toc-numb">3.1</span> <span>Sequential social dilemmas</span> </div> </a> <ul id="toc-Sequential_social_dilemmas-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Autocurricula" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Autocurricula"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Autocurricula</span> </div> </a> <ul id="toc-Autocurricula-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Applications" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Applications"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>Applications</span> </div> </a> <button aria-controls="toc-Applications-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Applications subsection</span> </button> <ul id="toc-Applications-sublist" class="vector-toc-list"> <li id="toc-AI_alignment" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#AI_alignment"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.1</span> <span>AI alignment</span> </div> </a> <ul id="toc-AI_alignment-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Limitations" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Limitations"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>Limitations</span> </div> </a> <ul id="toc-Limitations-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Further_reading" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Further_reading"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>Further reading</span> </div> </a> <ul id="toc-Further_reading-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Multi-agent reinforcement learning</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 5 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-5" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">5 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/Aprenentatge_de_refor%C3%A7_multiagent" title="Aprenentatge de reforç multiagent – Catalan" lang="ca" hreflang="ca" data-title="Aprenentatge de reforç multiagent" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/Aprendizaje_por_refuerzo_multiagente" title="Aprendizaje por refuerzo multiagente – Spanish" lang="es" hreflang="es" data-title="Aprendizaje por refuerzo multiagente" data-language-autonym="Español" data-language-local-name="Spanish" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%DB%8C%D8%A7%D8%AF%DA%AF%DB%8C%D8%B1%DB%8C_%D8%AA%D9%82%D9%88%DB%8C%D8%AA%DB%8C_%DA%86%D9%86%D8%AF%D8%B9%D8%A7%D9%85%D9%84%DB%8C" title="یادگیری تقویتی چندعاملی – Persian" lang="fa" hreflang="fa" data-title="یادگیری تقویتی چندعاملی" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-he mw-list-item"><a href="https://he.wikipedia.org/wiki/%D7%9C%D7%9E%D7%99%D7%93%D7%AA_%D7%97%D7%99%D7%96%D7%95%D7%A7_%D7%9E%D7%A8%D7%95%D7%91%D7%AA_%D7%A1%D7%95%D7%9B%D7%A0%D7%99%D7%9D" title="למידת חיזוק מרובת סוכנים – Hebrew" lang="he" hreflang="he" data-title="למידת חיזוק מרובת סוכנים" data-language-autonym="עברית" data-language-local-name="Hebrew" class="interlanguage-link-target"><span>עברית</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/%D0%91%D0%B0%D0%B3%D0%B0%D1%82%D0%BE%D0%B0%D0%B3%D0%B5%D0%BD%D1%82%D0%BD%D0%B5_%D0%BD%D0%B0%D0%B2%D1%87%D0%B0%D0%BD%D0%BD%D1%8F_%D0%B7_%D0%BF%D1%96%D0%B4%D0%BA%D1%80%D1%96%D0%BF%D0%BB%D0%B5%D0%BD%D0%BD%D1%8F%D0%BC" title="Багатоагентне навчання з підкріпленням – Ukrainian" lang="uk" hreflang="uk" data-title="Багатоагентне навчання з підкріпленням" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q85786957#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Multi-agent_reinforcement_learning" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Multi-agent_reinforcement_learning" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Multi-agent_reinforcement_learning"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Multi-agent_reinforcement_learning"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Multi-agent_reinforcement_learning" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Multi-agent_reinforcement_learning" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;oldid=1236297949" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&amp;page=Multi-agent_reinforcement_learning&amp;id=1236297949&amp;wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FMulti-agent_reinforcement_learning"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FMulti-agent_reinforcement_learning"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&amp;page=Multi-agent_reinforcement_learning&amp;action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q85786957" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Sub-field of reinforcement learning</div> <style data-mw-deduplicate="TemplateStyles:r1244144826">.mw-parser-output .machine-learning-list-title{background-color:#ddddff}html.skin-theme-clientpref-night .mw-parser-output .machine-learning-list-title{background-color:#222}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .machine-learning-list-title{background-color:#222}}</style> <style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1246091330">.mw-parser-output .sidebar{width:22em;float:right;clear:right;margin:0.5em 0 1em 1em;background:var(--background-color-neutral-subtle,#f8f9fa);border:1px solid var(--border-color-base,#a2a9b1);padding:0.2em;text-align:center;line-height:1.4em;font-size:88%;border-collapse:collapse;display:table}body.skin-minerva .mw-parser-output .sidebar{display:table!important;float:right!important;margin:0.5em 0 1em 1em!important}.mw-parser-output .sidebar-subgroup{width:100%;margin:0;border-spacing:0}.mw-parser-output .sidebar-left{float:left;clear:left;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-none{float:none;clear:both;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-outer-title{padding:0 0.4em 0.2em;font-size:125%;line-height:1.2em;font-weight:bold}.mw-parser-output .sidebar-top-image{padding:0.4em}.mw-parser-output .sidebar-top-caption,.mw-parser-output .sidebar-pretitle-with-top-image,.mw-parser-output .sidebar-caption{padding:0.2em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-pretitle{padding:0.4em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-title,.mw-parser-output .sidebar-title-with-pretitle{padding:0.2em 0.8em;font-size:145%;line-height:1.2em}.mw-parser-output .sidebar-title-with-pretitle{padding:0.1em 0.4em}.mw-parser-output .sidebar-image{padding:0.2em 0.4em 0.4em}.mw-parser-output .sidebar-heading{padding:0.1em 0.4em}.mw-parser-output .sidebar-content{padding:0 0.5em 0.4em}.mw-parser-output .sidebar-content-with-subgroup{padding:0.1em 0.4em 0.2em}.mw-parser-output .sidebar-above,.mw-parser-output .sidebar-below{padding:0.3em 0.8em;font-weight:bold}.mw-parser-output .sidebar-collapse .sidebar-above,.mw-parser-output .sidebar-collapse .sidebar-below{border-top:1px solid #aaa;border-bottom:1px solid #aaa}.mw-parser-output .sidebar-navbar{text-align:right;font-size:115%;padding:0 0.4em 0.4em}.mw-parser-output .sidebar-list-title{padding:0 0.4em;text-align:left;font-weight:bold;line-height:1.6em;font-size:105%}.mw-parser-output .sidebar-list-title-c{padding:0 0.4em;text-align:center;margin:0 3.3em}@media(max-width:640px){body.mediawiki .mw-parser-output .sidebar{width:100%!important;clear:both;float:none!important;margin-left:0!important;margin-right:0!important}}body.skin--responsive .mw-parser-output .sidebar a>img{max-width:none!important}@media screen{html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media print{body.ns-0 .mw-parser-output .sidebar{display:none!important}}</style><style data-mw-deduplicate="TemplateStyles:r886047488">.mw-parser-output .nobold{font-weight:normal}</style><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r886047488"><table class="sidebar sidebar-collapse nomobile nowraplinks"><tbody><tr><td class="sidebar-pretitle">Part of a series on</td></tr><tr><th class="sidebar-title-with-pretitle"><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a><br />and <a href="/wiki/Data_mining" title="Data mining">data mining</a></th></tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Paradigms</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a></li> <li><a href="/wiki/Unsupervised_learning" title="Unsupervised learning">Unsupervised learning</a></li> <li><a href="/wiki/Semi-supervised_learning" class="mw-redirect" title="Semi-supervised learning">Semi-supervised learning</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></li> <li><a href="/wiki/Meta-learning_(computer_science)" title="Meta-learning (computer science)">Meta-learning</a></li> <li><a href="/wiki/Online_machine_learning" title="Online machine learning">Online learning</a></li> <li><a href="/wiki/Batch_learning" class="mw-redirect" title="Batch learning">Batch learning</a></li> <li><a href="/wiki/Curriculum_learning" title="Curriculum learning">Curriculum learning</a></li> <li><a href="/wiki/Rule-based_machine_learning" title="Rule-based machine learning">Rule-based learning</a></li> <li><a href="/wiki/Neuro-symbolic_AI" title="Neuro-symbolic AI">Neuro-symbolic AI</a></li> <li><a href="/wiki/Neuromorphic_engineering" class="mw-redirect" title="Neuromorphic engineering">Neuromorphic engineering</a></li> <li><a href="/wiki/Quantum_machine_learning" title="Quantum machine learning">Quantum machine learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Problems</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Statistical_classification" title="Statistical classification">Classification</a></li> <li><a href="/wiki/Generative_model" title="Generative model">Generative modeling</a></li> <li><a href="/wiki/Regression_analysis" title="Regression analysis">Regression</a></li> <li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></li> <li><a href="/wiki/Dimensionality_reduction" title="Dimensionality reduction">Dimensionality reduction</a></li> <li><a href="/wiki/Density_estimation" title="Density estimation">Density estimation</a></li> <li><a href="/wiki/Anomaly_detection" title="Anomaly detection">Anomaly detection</a></li> <li><a href="/wiki/Data_cleaning" class="mw-redirect" title="Data cleaning">Data cleaning</a></li> <li><a href="/wiki/Automated_machine_learning" title="Automated machine learning">AutoML</a></li> <li><a href="/wiki/Association_rule_learning" title="Association rule learning">Association rules</a></li> <li><a href="/wiki/Semantic_analysis_(machine_learning)" title="Semantic analysis (machine learning)">Semantic analysis</a></li> <li><a href="/wiki/Structured_prediction" title="Structured prediction">Structured prediction</a></li> <li><a href="/wiki/Feature_engineering" title="Feature engineering">Feature engineering</a></li> <li><a href="/wiki/Feature_learning" title="Feature learning">Feature learning</a></li> <li><a href="/wiki/Learning_to_rank" title="Learning to rank">Learning to rank</a></li> <li><a href="/wiki/Grammar_induction" title="Grammar induction">Grammar induction</a></li> <li><a href="/wiki/Ontology_learning" title="Ontology learning">Ontology learning</a></li> <li><a href="/wiki/Multimodal_learning" title="Multimodal learning">Multimodal learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0;"><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a><br /><span class="nobold"><span style="font-size:85%;">(<b><a href="/wiki/Statistical_classification" title="Statistical classification">classification</a></b>&#160;&#8226;&#32;<b><a href="/wiki/Regression_analysis" title="Regression analysis">regression</a></b>)</span></span> </div></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Apprenticeship_learning" title="Apprenticeship learning">Apprenticeship learning</a></li> <li><a href="/wiki/Decision_tree_learning" title="Decision tree learning">Decision trees</a></li> <li><a href="/wiki/Ensemble_learning" title="Ensemble learning">Ensembles</a> <ul><li><a href="/wiki/Bootstrap_aggregating" title="Bootstrap aggregating">Bagging</a></li> <li><a href="/wiki/Boosting_(machine_learning)" title="Boosting (machine learning)">Boosting</a></li> <li><a href="/wiki/Random_forest" title="Random forest">Random forest</a></li></ul></li> <li><a href="/wiki/K-nearest_neighbors_algorithm" title="K-nearest neighbors algorithm"><i>k</i>-NN</a></li> <li><a href="/wiki/Linear_regression" title="Linear regression">Linear regression</a></li> <li><a href="/wiki/Naive_Bayes_classifier" title="Naive Bayes classifier">Naive Bayes</a></li> <li><a href="/wiki/Artificial_neural_network" class="mw-redirect" title="Artificial neural network">Artificial neural networks</a></li> <li><a href="/wiki/Logistic_regression" title="Logistic regression">Logistic regression</a></li> <li><a href="/wiki/Perceptron" title="Perceptron">Perceptron</a></li> <li><a href="/wiki/Relevance_vector_machine" title="Relevance vector machine">Relevance vector machine (RVM)</a></li> <li><a href="/wiki/Support_vector_machine" title="Support vector machine">Support vector machine (SVM)</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/BIRCH" title="BIRCH">BIRCH</a></li> <li><a href="/wiki/CURE_algorithm" title="CURE algorithm">CURE</a></li> <li><a href="/wiki/Hierarchical_clustering" title="Hierarchical clustering">Hierarchical</a></li> <li><a href="/wiki/K-means_clustering" title="K-means clustering"><i>k</i>-means</a></li> <li><a href="/wiki/Fuzzy_clustering" title="Fuzzy clustering">Fuzzy</a></li> <li><a href="/wiki/Expectation%E2%80%93maximization_algorithm" title="Expectation–maximization algorithm">Expectation–maximization (EM)</a></li> <li><br /><a href="/wiki/DBSCAN" title="DBSCAN">DBSCAN</a></li> <li><a href="/wiki/OPTICS_algorithm" title="OPTICS algorithm">OPTICS</a></li> <li><a href="/wiki/Mean_shift" title="Mean shift">Mean shift</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Dimensionality_reduction" title="Dimensionality reduction">Dimensionality reduction</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Factor_analysis" title="Factor analysis">Factor analysis</a></li> <li><a href="/wiki/Canonical_correlation" title="Canonical correlation">CCA</a></li> <li><a href="/wiki/Independent_component_analysis" title="Independent component analysis">ICA</a></li> <li><a href="/wiki/Linear_discriminant_analysis" title="Linear discriminant analysis">LDA</a></li> <li><a href="/wiki/Non-negative_matrix_factorization" title="Non-negative matrix factorization">NMF</a></li> <li><a href="/wiki/Principal_component_analysis" title="Principal component analysis">PCA</a></li> <li><a href="/wiki/Proper_generalized_decomposition" title="Proper generalized decomposition">PGD</a></li> <li><a href="/wiki/T-distributed_stochastic_neighbor_embedding" title="T-distributed stochastic neighbor embedding">t-SNE</a></li> <li><a href="/wiki/Sparse_dictionary_learning" title="Sparse dictionary learning">SDL</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Structured_prediction" title="Structured prediction">Structured prediction</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Graphical_model" title="Graphical model">Graphical models</a> <ul><li><a href="/wiki/Bayesian_network" title="Bayesian network">Bayes net</a></li> <li><a href="/wiki/Conditional_random_field" title="Conditional random field">Conditional random field</a></li> <li><a href="/wiki/Hidden_Markov_model" title="Hidden Markov model">Hidden Markov</a></li></ul></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Anomaly_detection" title="Anomaly detection">Anomaly detection</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Random_sample_consensus" title="Random sample consensus">RANSAC</a></li> <li><a href="/wiki/K-nearest_neighbors_algorithm" title="K-nearest neighbors algorithm"><i>k</i>-NN</a></li> <li><a href="/wiki/Local_outlier_factor" title="Local outlier factor">Local outlier factor</a></li> <li><a href="/wiki/Isolation_forest" title="Isolation forest">Isolation forest</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Artificial_neural_network" class="mw-redirect" title="Artificial neural network">Artificial neural network</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li> <li><a href="/wiki/Feedforward_neural_network" title="Feedforward neural network">Feedforward neural network</a></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network</a> <ul><li><a href="/wiki/Long_short-term_memory" title="Long short-term memory">LSTM</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">GRU</a></li> <li><a href="/wiki/Echo_state_network" title="Echo state network">ESN</a></li> <li><a href="/wiki/Reservoir_computing" title="Reservoir computing">reservoir computing</a></li></ul></li> <li><a href="/wiki/Boltzmann_machine" title="Boltzmann machine">Boltzmann machine</a> <ul><li><a href="/wiki/Restricted_Boltzmann_machine" title="Restricted Boltzmann machine">Restricted</a></li></ul></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">GAN</a></li> <li><a href="/wiki/Diffusion_model" title="Diffusion model">Diffusion model</a></li> <li><a href="/wiki/Self-organizing_map" title="Self-organizing map">SOM</a></li> <li><a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">Convolutional neural network</a> <ul><li><a href="/wiki/U-Net" title="U-Net">U-Net</a></li> <li><a href="/wiki/LeNet" title="LeNet">LeNet</a></li> <li><a href="/wiki/AlexNet" title="AlexNet">AlexNet</a></li> <li><a href="/wiki/DeepDream" title="DeepDream">DeepDream</a></li></ul></li> <li><a href="/wiki/Neural_radiance_field" title="Neural radiance field">Neural radiance field</a></li> <li><a href="/wiki/Transformer_(machine_learning_model)" class="mw-redirect" title="Transformer (machine learning model)">Transformer</a> <ul><li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision</a></li></ul></li> <li><a href="/wiki/Mamba_(deep_learning_architecture)" title="Mamba (deep learning architecture)">Mamba</a></li> <li><a href="/wiki/Spiking_neural_network" title="Spiking neural network">Spiking neural network</a></li> <li><a href="/wiki/Memtransistor" title="Memtransistor">Memtransistor</a></li> <li><a href="/wiki/Electrochemical_RAM" title="Electrochemical RAM">Electrochemical RAM</a> (ECRAM)</li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Q-learning" title="Q-learning">Q-learning</a></li> <li><a href="/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action" title="State–action–reward–state–action">SARSA</a></li> <li><a href="/wiki/Temporal_difference_learning" title="Temporal difference learning">Temporal difference (TD)</a></li> <li><a class="mw-selflink selflink">Multi-agent</a> <ul><li><a href="/wiki/Self-play_(reinforcement_learning_technique)" class="mw-redirect" title="Self-play (reinforcement learning technique)">Self-play</a></li></ul></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Learning with humans</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Active_learning_(machine_learning)" title="Active learning (machine learning)">Active learning</a></li> <li><a href="/wiki/Crowdsourcing" title="Crowdsourcing">Crowdsourcing</a></li> <li><a href="/wiki/Human-in-the-loop" title="Human-in-the-loop">Human-in-the-loop</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">RLHF</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Model diagnostics</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Coefficient_of_determination" title="Coefficient of determination">Coefficient of determination</a></li> <li><a href="/wiki/Confusion_matrix" title="Confusion matrix">Confusion matrix</a></li> <li><a href="/wiki/Learning_curve_(machine_learning)" title="Learning curve (machine learning)">Learning curve</a></li> <li><a href="/wiki/Receiver_operating_characteristic" title="Receiver operating characteristic">ROC curve</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Mathematical foundations</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Kernel_machines" class="mw-redirect" title="Kernel machines">Kernel machines</a></li> <li><a href="/wiki/Bias%E2%80%93variance_tradeoff" title="Bias–variance tradeoff">Bias–variance tradeoff</a></li> <li><a href="/wiki/Computational_learning_theory" title="Computational learning theory">Computational learning theory</a></li> <li><a href="/wiki/Empirical_risk_minimization" title="Empirical risk minimization">Empirical risk minimization</a></li> <li><a href="/wiki/Occam_learning" title="Occam learning">Occam learning</a></li> <li><a href="/wiki/Probably_approximately_correct_learning" title="Probably approximately correct learning">PAC learning</a></li> <li><a href="/wiki/Statistical_learning_theory" title="Statistical learning theory">Statistical learning</a></li> <li><a href="/wiki/Vapnik%E2%80%93Chervonenkis_theory" title="Vapnik–Chervonenkis theory">VC theory</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Journals and conferences</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/ECML_PKDD" title="ECML PKDD">ECML PKDD</a></li> <li><a href="/wiki/Conference_on_Neural_Information_Processing_Systems" title="Conference on Neural Information Processing Systems">NeurIPS</a></li> <li><a href="/wiki/International_Conference_on_Machine_Learning" title="International Conference on Machine Learning">ICML</a></li> <li><a href="/wiki/International_Conference_on_Learning_Representations" title="International Conference on Learning Representations">ICLR</a></li> <li><a href="/wiki/International_Joint_Conference_on_Artificial_Intelligence" title="International Joint Conference on Artificial Intelligence">IJCAI</a></li> <li><a href="/wiki/Machine_Learning_(journal)" title="Machine Learning (journal)">ML</a></li> <li><a href="/wiki/Journal_of_Machine_Learning_Research" title="Journal of Machine Learning Research">JMLR</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Related articles</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Glossary_of_artificial_intelligence" title="Glossary of artificial intelligence">Glossary of artificial intelligence</a></li> <li><a href="/wiki/List_of_datasets_for_machine-learning_research" title="List of datasets for machine-learning research">List of datasets for machine-learning research</a> <ul><li><a href="/wiki/List_of_datasets_in_computer_vision_and_image_processing" title="List of datasets in computer vision and image processing">List of datasets in computer vision and image processing</a></li></ul></li> <li><a href="/wiki/Outline_of_machine_learning" title="Outline of machine learning">Outline of machine learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-navbar"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Machine_learning" title="Template:Machine learning"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Machine_learning" title="Template talk:Machine learning"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Machine_learning" title="Special:EditPage/Template:Machine learning"><abbr title="Edit this template">e</abbr></a></li></ul></div></td></tr></tbody></table> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Magent-graph-2.gif" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/d/da/Magent-graph-2.gif/220px-Magent-graph-2.gif" decoding="async" width="220" height="197" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/d/da/Magent-graph-2.gif/330px-Magent-graph-2.gif 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/d/da/Magent-graph-2.gif/440px-Magent-graph-2.gif 2x" data-file-width="500" data-file-height="448" /></a><figcaption>Two rival teams of agents face off in a <a rel="nofollow" class="external text" href="https://github.com/Farama-Foundation/MAgent#readme">MARL experiment</a></figcaption></figure> <p><b>Multi-agent reinforcement learning (MARL)</b> is a sub-field of <a href="/wiki/Reinforcement_learning" title="Reinforcement learning">reinforcement learning</a>. It focuses on studying the behavior of multiple learning agents that coexist in a shared environment.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> Each agent is motivated by its own rewards, and does actions to advance its own interests; in some environments these interests are opposed to the interests of other agents, resulting in complex <a href="/wiki/Group_dynamics" title="Group dynamics">group dynamics</a>. </p><p>Multi-agent reinforcement learning is closely related to <a href="/wiki/Game_theory" title="Game theory">game theory</a> and especially <a href="/wiki/Repeated_game" title="Repeated game">repeated games</a>, as well as <a href="/wiki/Multi-agent_system" title="Multi-agent system">multi-agent systems</a>. Its study combines the pursuit of finding ideal algorithms that maximize rewards with a more sociological set of concepts. While research in single-agent reinforcement learning is concerned with finding the algorithm that gets the biggest number of points for one agent, research in multi-agent reinforcement learning evaluates and quantifies social metrics, such as cooperation,<sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span class="cite-bracket">&#91;</span>2<span class="cite-bracket">&#93;</span></a></sup> reciprocity,<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span class="cite-bracket">&#91;</span>3<span class="cite-bracket">&#93;</span></a></sup> equity,<sup id="cite_ref-Hughes_2018_inequity_4-0" class="reference"><a href="#cite_note-Hughes_2018_inequity-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup> social influence,<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span class="cite-bracket">&#91;</span>5<span class="cite-bracket">&#93;</span></a></sup> language<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> and discrimination.<sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span class="cite-bracket">&#91;</span>7<span class="cite-bracket">&#93;</span></a></sup> </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="Definition">Definition</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=1" title="Edit section: Definition"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Similarly to <a href="/wiki/Reinforcement_learning" title="Reinforcement learning">single-agent reinforcement learning</a>, multi-agent reinforcement learning is modeled as some form of a <a href="/wiki/Markov_decision_process" title="Markov decision process">Markov decision process (MDP)</a>. For example, </p> <ul><li>A set <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle S}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>S</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle S}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4611d85173cd3b508e67077d4a1252c9c05abca2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.499ex; height:2.176ex;" alt="{\displaystyle S}"></span> of environment states.</li> <li>One set <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\mathcal {A}}_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">A</mi> </mrow> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\mathcal {A}}_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b8a237e3831acdc4229ee8a60fb4d4d764a34567" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.654ex; height:2.676ex;" alt="{\displaystyle {\mathcal {A}}_{i}}"></span> of actions for each of the agents <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i\in I=\{1,...,N\}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> <mo>&#x2208;<!-- ∈ --></mo> <mi>I</mi> <mo>=</mo> <mo fence="false" stretchy="false">{</mo> <mn>1</mn> <mo>,</mo> <mo>.</mo> <mo>.</mo> <mo>.</mo> <mo>,</mo> <mi>N</mi> <mo fence="false" stretchy="false">}</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i\in I=\{1,...,N\}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e2a423aef18e8a09dd3b6cef256a520b74b9f492" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:18.634ex; height:2.843ex;" alt="{\displaystyle i\in I=\{1,...,N\}}"></span>.</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle P_{\overrightarrow {a}}(s,s')=\Pr(s_{t+1}=s'\mid s_{t}=s,{\overrightarrow {a}}_{t}={\overrightarrow {a}})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>P</mi> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>a</mi> <mo>&#x2192;<!-- → --></mo> </mover> </mrow> </msub> <mo stretchy="false">(</mo> <mi>s</mi> <mo>,</mo> <msup> <mi>s</mi> <mo>&#x2032;</mo> </msup> <mo stretchy="false">)</mo> <mo>=</mo> <mo movablelimits="true" form="prefix">Pr</mo> <mo stretchy="false">(</mo> <msub> <mi>s</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <msup> <mi>s</mi> <mo>&#x2032;</mo> </msup> <mo>&#x2223;<!-- ∣ --></mo> <msub> <mi>s</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>=</mo> <mi>s</mi> <mo>,</mo> <msub> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>a</mi> <mo>&#x2192;<!-- → --></mo> </mover> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>a</mi> <mo>&#x2192;<!-- → --></mo> </mover> </mrow> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle P_{\overrightarrow {a}}(s,s')=\Pr(s_{t+1}=s'\mid s_{t}=s,{\overrightarrow {a}}_{t}={\overrightarrow {a}})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/839e48885b6f207ea40b02e7961007ef42b3c775" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.505ex; width:43.018ex; height:4.176ex;" alt="{\displaystyle P_{\overrightarrow {a}}(s,s&#039;)=\Pr(s_{t+1}=s&#039;\mid s_{t}=s,{\overrightarrow {a}}_{t}={\overrightarrow {a}})}"></span> is the probability of transition (at time <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/65658b7b223af9e1acc877d848888ecdb4466560" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.84ex; height:2.009ex;" alt="{\displaystyle t}"></span>) from state <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle s}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>s</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle s}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/01d131dfd7673938b947072a13a9744fe997e632" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.09ex; height:1.676ex;" alt="{\displaystyle s}"></span> to state <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle s'}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>s</mi> <mo>&#x2032;</mo> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle s'}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5136680c63706cfd17ceddb4acddbfdd0ba5ef2d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.775ex; height:2.509ex;" alt="{\displaystyle s&#039;}"></span> under joint action <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\overrightarrow {a}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>a</mi> <mo>&#x2192;<!-- → --></mo> </mover> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\overrightarrow {a}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b77149adfb778a5de4e0f9e99243919227669a7f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.324ex; height:3.009ex;" alt="{\displaystyle {\overrightarrow {a}}}"></span>.</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\overrightarrow {R}}_{\overrightarrow {a}}(s,s')}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>R</mi> <mo>&#x2192;<!-- → --></mo> </mover> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>a</mi> <mo>&#x2192;<!-- → --></mo> </mover> </mrow> </msub> <mo stretchy="false">(</mo> <mi>s</mi> <mo>,</mo> <msup> <mi>s</mi> <mo>&#x2032;</mo> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\overrightarrow {R}}_{\overrightarrow {a}}(s,s')}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3584e0cc06964854858067b74b720f7884223189" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.505ex; width:10.101ex; height:4.843ex;" alt="{\displaystyle {\overrightarrow {R}}_{\overrightarrow {a}}(s,s&#039;)}"></span> is the immediate joint reward after the transition from <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle s}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>s</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle s}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/01d131dfd7673938b947072a13a9744fe997e632" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.09ex; height:1.676ex;" alt="{\displaystyle s}"></span> to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle s'}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>s</mi> <mo>&#x2032;</mo> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle s'}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5136680c63706cfd17ceddb4acddbfdd0ba5ef2d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.775ex; height:2.509ex;" alt="{\displaystyle s&#039;}"></span> with joint action <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\overrightarrow {a}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>a</mi> <mo>&#x2192;<!-- → --></mo> </mover> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\overrightarrow {a}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b77149adfb778a5de4e0f9e99243919227669a7f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.324ex; height:3.009ex;" alt="{\displaystyle {\overrightarrow {a}}}"></span>.</li></ul> <p>In settings with <a href="/wiki/Perfect_information" title="Perfect information">perfect information</a>, such as the games of <a href="/wiki/Chess" title="Chess">chess</a> and <a href="/wiki/Go_(game)" title="Go (game)">Go</a>, the MDP would be fully observable. In settings with imperfect information, especially in real-world applications like <a href="/wiki/Self-driving_car" title="Self-driving car">self-driving cars</a>, each agent would access an observation that only has part of the information about the current state. In the partially observable setting, the core model is the partially observable <a href="/wiki/Stochastic_game" title="Stochastic game">stochastic game</a> in the general case, and the <a href="/wiki/Decentralized_partially_observable_Markov_decision_process" title="Decentralized partially observable Markov decision process">decentralized POMDP</a> in the cooperative case. </p> <div class="mw-heading mw-heading2"><h2 id="Cooperation_vs._competition">Cooperation vs. competition</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=2" title="Edit section: Cooperation vs. competition"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>When multiple agents are acting in a shared environment their interests might be aligned or misaligned. MARL allows exploring all the different alignments and how they affect the agents' behavior: </p> <ul><li>In <a href="#Pure_competition_settings">pure competition settings</a>, the agents' rewards are exactly opposite to each other, and therefore they are playing <i>against</i> each other.</li> <li><a href="#Pure_cooperation_settings">Pure cooperation settings</a> are the other extreme, in which agents get the exact same rewards, and therefore they are playing <i>with</i> each other.</li> <li><a href="#Mixed-sum_settings">Mixed-sum settings</a> cover all the games that combine elements of both cooperation and competition.</li></ul> <div class="mw-heading mw-heading3"><h3 id="Pure_competition_settings">Pure competition settings</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=3" title="Edit section: Pure competition settings"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>When two agents are playing a <a href="/wiki/Zero-sum_game" title="Zero-sum game">zero-sum game</a>, they are in pure competition with each other. Many traditional games such as <a href="/wiki/Chess" title="Chess">chess</a> and <a href="/wiki/Go_(game)" title="Go (game)">Go</a> fall under this category, as do two-player variants of modern games like <a href="/wiki/StarCraft" title="StarCraft">StarCraft</a>. Because each agent can only win at the expense of the other agent, many complexities are stripped away. There's no prospect of communication or social dilemmas, as neither agent is incentivized to take actions that benefit its opponent. </p><p>The <a href="/wiki/Deep_Blue_(chess_computer)" title="Deep Blue (chess computer)">Deep Blue</a><sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span class="cite-bracket">&#91;</span>8<span class="cite-bracket">&#93;</span></a></sup> and <a href="/wiki/AlphaGo" title="AlphaGo">AlphaGo</a> projects demonstrate how to optimize the performance of agents in pure competition settings. </p><p>One complexity that is not stripped away in pure competition settings is <a href="#Autocurricula">autocurricula</a>. As the agents' policy is improved using <a href="/wiki/Self-play_(reinforcement_learning_technique)" class="mw-redirect" title="Self-play (reinforcement learning technique)">self-play</a>, multiple layers of learning may occur. </p> <div class="mw-heading mw-heading3"><h3 id="Pure_cooperation_settings">Pure cooperation settings</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=4" title="Edit section: Pure cooperation settings"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>MARL is used to explore how separate agents with identical interests can communicate and work together. Pure cooperation settings are explored in recreational <a href="/wiki/Cooperative_video_game" title="Cooperative video game">cooperative games</a> such as <a href="/wiki/Overcooked" title="Overcooked">Overcooked</a>,<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span class="cite-bracket">&#91;</span>9<span class="cite-bracket">&#93;</span></a></sup> as well as real-world scenarios in <a href="/wiki/Robotics" title="Robotics">robotics</a>.<sup id="cite_ref-10" class="reference"><a href="#cite_note-10"><span class="cite-bracket">&#91;</span>10<span class="cite-bracket">&#93;</span></a></sup> </p><p>In pure cooperation settings all the agents get identical rewards, which means that social dilemmas do not occur. </p><p>In pure cooperation settings, oftentimes there are an arbitrary number of coordination strategies, and agents converge to specific "conventions" when coordinating with each other. The notion of conventions has been studied in language<sup id="cite_ref-11" class="reference"><a href="#cite_note-11"><span class="cite-bracket">&#91;</span>11<span class="cite-bracket">&#93;</span></a></sup> and also alluded to in more general multi-agent collaborative tasks.<sup id="cite_ref-12" class="reference"><a href="#cite_note-12"><span class="cite-bracket">&#91;</span>12<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-13" class="reference"><a href="#cite_note-13"><span class="cite-bracket">&#91;</span>13<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-14" class="reference"><a href="#cite_note-14"><span class="cite-bracket">&#91;</span>14<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-15" class="reference"><a href="#cite_note-15"><span class="cite-bracket">&#91;</span>15<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Mixed-sum_settings">Mixed-sum settings</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=5" title="Edit section: Mixed-sum settings"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Multi_give_way_(4_agents,_each_trying_to_reach_a_specific_point).gif" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/3/37/Multi_give_way_%284_agents%2C_each_trying_to_reach_a_specific_point%29.gif/220px-Multi_give_way_%284_agents%2C_each_trying_to_reach_a_specific_point%29.gif" decoding="async" width="220" height="220" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/37/Multi_give_way_%284_agents%2C_each_trying_to_reach_a_specific_point%29.gif/330px-Multi_give_way_%284_agents%2C_each_trying_to_reach_a_specific_point%29.gif 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/37/Multi_give_way_%284_agents%2C_each_trying_to_reach_a_specific_point%29.gif/440px-Multi_give_way_%284_agents%2C_each_trying_to_reach_a_specific_point%29.gif 2x" data-file-width="450" data-file-height="450" /></a><figcaption>In this mixed sum setting, each of the four agents is trying to reach a different goal. Each agent's success depends on the other agents clearing its way, even though they are not directly incentivized to assist each other.<sup id="cite_ref-16" class="reference"><a href="#cite_note-16"><span class="cite-bracket">&#91;</span>16<span class="cite-bracket">&#93;</span></a></sup></figcaption></figure> <p>Most real-world scenarios involving multiple agents have elements of both cooperation and competition. For example, when multiple <a href="/wiki/Self-driving_cars" class="mw-redirect" title="Self-driving cars">self-driving cars</a> are planning their respective paths, each of them has interests that are diverging but not exclusive: Each car is minimizing the amount of time it's taking to reach its destination, but all cars have the shared interest of avoiding a <a href="/wiki/Traffic_collision" title="Traffic collision">traffic collision</a>.<sup id="cite_ref-17" class="reference"><a href="#cite_note-17"><span class="cite-bracket">&#91;</span>17<span class="cite-bracket">&#93;</span></a></sup> </p><p>Zero-sum settings with three or more agents often exhibit similar properties to mixed-sum settings, since each pair of agents might have a non-zero utility sum between them. </p><p>Mixed-sum settings can be explored using classic <a href="/wiki/Normal-form_game" title="Normal-form game">matrix games</a> such as <a href="/wiki/Prisoner%27s_dilemma" title="Prisoner&#39;s dilemma">prisoner's dilemma</a>, more complex <a href="#Sequential_social_dilemmas">sequential social dilemmas</a>, and recreational games such as <a href="/wiki/Among_Us" title="Among Us">Among Us</a>,<sup id="cite_ref-18" class="reference"><a href="#cite_note-18"><span class="cite-bracket">&#91;</span>18<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Diplomacy_(game)" title="Diplomacy (game)">Diplomacy</a><sup id="cite_ref-19" class="reference"><a href="#cite_note-19"><span class="cite-bracket">&#91;</span>19<span class="cite-bracket">&#93;</span></a></sup> and <a href="/wiki/StarCraft_II" title="StarCraft II">StarCraft II</a>.<sup id="cite_ref-20" class="reference"><a href="#cite_note-20"><span class="cite-bracket">&#91;</span>20<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-21" class="reference"><a href="#cite_note-21"><span class="cite-bracket">&#91;</span>21<span class="cite-bracket">&#93;</span></a></sup> </p><p>Mixed-sum settings can give rise to communication and social dilemmas. </p> <div class="mw-heading mw-heading2"><h2 id="Social_dilemmas">Social dilemmas</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=6" title="Edit section: Social dilemmas"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>As in <a href="/wiki/Game_theory" title="Game theory">game theory</a>, much of the research in MARL revolves around <a href="/wiki/Collective_action_problem" title="Collective action problem">social dilemmas</a>, such as <a href="/wiki/Prisoner%27s_dilemma" title="Prisoner&#39;s dilemma">prisoner's dilemma</a>,<sup id="cite_ref-22" class="reference"><a href="#cite_note-22"><span class="cite-bracket">&#91;</span>22<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Chicken_(game)" title="Chicken (game)">chicken</a> and <a href="/wiki/Stag_hunt" title="Stag hunt">stag hunt</a>.<sup id="cite_ref-23" class="reference"><a href="#cite_note-23"><span class="cite-bracket">&#91;</span>23<span class="cite-bracket">&#93;</span></a></sup> </p><p>While game theory research might focus on <a href="/wiki/Nash_equilibrium" title="Nash equilibrium">Nash equilibria</a> and what an ideal policy for an agent would be, MARL research focuses on how the agents would learn these ideal policies using a trial-and-error process. The <a href="/wiki/Reinforcement_learning" title="Reinforcement learning">reinforcement learning</a> algorithms that are used to train the agents are maximizing the agent's own reward; the conflict between the needs of the agents and the needs of the group is a subject of active research.<sup id="cite_ref-24" class="reference"><a href="#cite_note-24"><span class="cite-bracket">&#91;</span>24<span class="cite-bracket">&#93;</span></a></sup> </p><p>Various techniques have been explored in order to induce cooperation in agents: Modifying the environment rules,<sup id="cite_ref-25" class="reference"><a href="#cite_note-25"><span class="cite-bracket">&#91;</span>25<span class="cite-bracket">&#93;</span></a></sup> adding intrinsic rewards,<sup id="cite_ref-Hughes_2018_inequity_4-1" class="reference"><a href="#cite_note-Hughes_2018_inequity-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup> and more. </p> <div class="mw-heading mw-heading3"><h3 id="Sequential_social_dilemmas">Sequential social dilemmas</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=7" title="Edit section: Sequential social dilemmas"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Social dilemmas like prisoner's dilemma, chicken and stag hunt are "matrix games". Each agent takes only one action from a choice of two possible actions, and a simple 2x2 matrix is used to describe the reward that each agent will get, given the actions that each agent took. </p><p>In humans and other living creatures, social dilemmas tend to be more complex. Agents take multiple actions over time, and the distinction between cooperating and defecting is not as clear cut as in matrix games. The concept of a <b>sequential social dilemma (SSD)</b> was introduced in 2017<sup id="cite_ref-26" class="reference"><a href="#cite_note-26"><span class="cite-bracket">&#91;</span>26<span class="cite-bracket">&#93;</span></a></sup> as an attempt to model that complexity. There is ongoing research into defining different kinds of SSDs and showing cooperative behavior in the agents that act in them.<sup id="cite_ref-27" class="reference"><a href="#cite_note-27"><span class="cite-bracket">&#91;</span>27<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Autocurricula">Autocurricula</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=8" title="Edit section: Autocurricula"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>An autocurriculum<sup id="cite_ref-28" class="reference"><a href="#cite_note-28"><span class="cite-bracket">&#91;</span>28<span class="cite-bracket">&#93;</span></a></sup> (plural: autocurricula) is a reinforcement learning concept that's salient in multi-agent experiments. As agents improve their performance, they change their environment; this change in the environment affects themselves and the other agents. The feedback loop results in several distinct phases of learning, each depending on the previous one. The stacked layers of learning are called an autocurriculum. Autocurricula are especially apparent in adversarial settings,<sup id="cite_ref-29" class="reference"><a href="#cite_note-29"><span class="cite-bracket">&#91;</span>29<span class="cite-bracket">&#93;</span></a></sup> where each group of agents is racing to counter the current strategy of the opposing group. </p><p>The <a rel="nofollow" class="external text" href="https://www.youtube.com/watch?v=kopoLzvh5jY">Hide and Seek game</a> is an accessible example of an autocurriculum occurring in an adversarial setting. In this experiment, a team of seekers is competing against a team of hiders. Whenever one of the teams learns a new strategy, the opposing team adapts its strategy to give the best possible counter. When the hiders learn to use boxes to build a shelter, the seekers respond by learning to use a ramp to break into that shelter. The hiders respond by locking the ramps, making them unavailable for the seekers to use. The seekers then respond by "box surfing", exploiting a <a href="/wiki/Glitching" class="mw-redirect" title="Glitching">glitch</a> in the game to penetrate the shelter. Each "level" of learning is an emergent phenomenon, with the previous level as its premise. This results in a stack of behaviors, each dependent on its predecessor. </p><p>Autocurricula in reinforcement learning experiments are compared to the stages of the <a href="/wiki/Evolution" title="Evolution">evolution of life on Earth</a> and the development of <a href="/wiki/Culture" title="Culture">human culture</a>. A major stage in evolution happened 2-3 billion years ago, when <a href="/wiki/Photosynthesis" title="Photosynthesis">photosynthesizing life forms</a> started to produce massive amounts of <a href="/wiki/Oxygen" title="Oxygen">oxygen</a>, changing the balance of gases in the atmosphere.<sup id="cite_ref-30" class="reference"><a href="#cite_note-30"><span class="cite-bracket">&#91;</span>30<span class="cite-bracket">&#93;</span></a></sup> In the next stages of evolution, oxygen-breathing life forms evolved, eventually leading up to land <a href="/wiki/Mammal" title="Mammal">mammals</a> and human beings. These later stages could only happen after the photosynthesis stage made oxygen widely available. Similarly, human culture could not have gone through the <a href="/wiki/Industrial_Revolution" title="Industrial Revolution">Industrial Revolution</a> in the 18th century without the resources and insights gained by the <a href="/wiki/Neolithic_Revolution" title="Neolithic Revolution">agricultural revolution</a> at around 10,000 BC.<sup id="cite_ref-31" class="reference"><a href="#cite_note-31"><span class="cite-bracket">&#91;</span>31<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Applications">Applications</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=9" title="Edit section: Applications"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Multi-agent reinforcement learning has been applied to a variety of use cases in science and industry: </p> <style data-mw-deduplicate="TemplateStyles:r1184024115">.mw-parser-output .div-col{margin-top:0.3em;column-width:30em}.mw-parser-output .div-col-small{font-size:90%}.mw-parser-output .div-col-rules{column-rule:1px solid #aaa}.mw-parser-output .div-col dl,.mw-parser-output .div-col ol,.mw-parser-output .div-col ul{margin-top:0}.mw-parser-output .div-col li,.mw-parser-output .div-col dd{page-break-inside:avoid;break-inside:avoid-column}</style><div class="div-col" style="column-width: 18em;"> <ul><li><a href="/wiki/Broadband" title="Broadband">Broadband</a> <a href="/wiki/Cellular_networks" class="mw-redirect" title="Cellular networks">cellular networks</a> such as <a href="/wiki/5G" title="5G">5G</a><sup id="cite_ref-Li_2022_32-0" class="reference"><a href="#cite_note-Li_2022-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Cache_(computing)" title="Cache (computing)">Content caching</a><sup id="cite_ref-Li_2022_32-1" class="reference"><a href="#cite_note-Li_2022-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Routing" title="Routing">Packet routing</a><sup id="cite_ref-Li_2022_32-2" class="reference"><a href="#cite_note-Li_2022-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Computer_vision" title="Computer vision">Computer vision</a><sup id="cite_ref-33" class="reference"><a href="#cite_note-33"><span class="cite-bracket">&#91;</span>33<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Network_security" title="Network security">Network security</a><sup id="cite_ref-Li_2022_32-3" class="reference"><a href="#cite_note-Li_2022-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Power_control#Transmit_power_control" title="Power control">Transmit power control</a><sup id="cite_ref-Li_2022_32-4" class="reference"><a href="#cite_note-Li_2022-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Computation_offloading" title="Computation offloading">Computation offloading</a><sup id="cite_ref-Li_2022_32-5" class="reference"><a href="#cite_note-Li_2022-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Evolution_of_languages" title="Evolution of languages">Language evolution research</a><sup id="cite_ref-34" class="reference"><a href="#cite_note-34"><span class="cite-bracket">&#91;</span>34<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Global_health" title="Global health">Global health</a><sup id="cite_ref-35" class="reference"><a href="#cite_note-35"><span class="cite-bracket">&#91;</span>35<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Integrated_circuit_design" title="Integrated circuit design">Integrated circuit design</a><sup id="cite_ref-36" class="reference"><a href="#cite_note-36"><span class="cite-bracket">&#91;</span>36<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Internet_of_Things" class="mw-redirect" title="Internet of Things">Internet of Things</a><sup id="cite_ref-Li_2022_32-6" class="reference"><a href="#cite_note-Li_2022-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Microgrid" title="Microgrid">Microgrid</a> <a href="/wiki/Energy_management" title="Energy management">energy management</a><sup id="cite_ref-37" class="reference"><a href="#cite_note-37"><span class="cite-bracket">&#91;</span>37<span class="cite-bracket">&#93;</span></a></sup></li> <li>Multi-camera control<sup id="cite_ref-38" class="reference"><a href="#cite_note-38"><span class="cite-bracket">&#91;</span>38<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Self-driving_car" title="Self-driving car">Autonomous vehicles</a><sup id="cite_ref-39" class="reference"><a href="#cite_note-39"><span class="cite-bracket">&#91;</span>39<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Sports_analytics" title="Sports analytics">Sports analytics</a><sup id="cite_ref-40" class="reference"><a href="#cite_note-40"><span class="cite-bracket">&#91;</span>40<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Traffic_control" class="mw-redirect" title="Traffic control">Traffic control</a><sup id="cite_ref-41" class="reference"><a href="#cite_note-41"><span class="cite-bracket">&#91;</span>41<span class="cite-bracket">&#93;</span></a></sup> (<a href="/wiki/Ramp_metering" class="mw-redirect" title="Ramp metering">Ramp metering</a><sup id="cite_ref-42" class="reference"><a href="#cite_note-42"><span class="cite-bracket">&#91;</span>42<span class="cite-bracket">&#93;</span></a></sup>)</li> <li><a href="/wiki/Unmanned_aerial_vehicles" class="mw-redirect" title="Unmanned aerial vehicles">Unmanned aerial vehicles</a><sup id="cite_ref-43" class="reference"><a href="#cite_note-43"><span class="cite-bracket">&#91;</span>43<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-Li_2022_32-7" class="reference"><a href="#cite_note-Li_2022-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Wildlife_conservation" title="Wildlife conservation">Wildlife conservation</a><sup id="cite_ref-44" class="reference"><a href="#cite_note-44"><span class="cite-bracket">&#91;</span>44<span class="cite-bracket">&#93;</span></a></sup></li></ul></div> <div class="mw-heading mw-heading3"><h3 id="AI_alignment">AI alignment</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=10" title="Edit section: AI alignment"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Multi-agent reinforcement learning has been used in research into <a href="/wiki/AI_alignment" title="AI alignment">AI alignment</a>. The relationship between the different agents in a MARL setting can be compared to the relationship between a human and an AI agent. Research efforts in the intersection of these two fields attempt to simulate possible conflicts between a human's intentions and an AI agent's actions, and then explore which variables could be changed to prevent these conflicts.<sup id="cite_ref-45" class="reference"><a href="#cite_note-45"><span class="cite-bracket">&#91;</span>45<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-46" class="reference"><a href="#cite_note-46"><span class="cite-bracket">&#91;</span>46<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Limitations">Limitations</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=11" title="Edit section: Limitations"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>There are some inherent difficulties about multi-agent <a href="/wiki/Deep_reinforcement_learning" title="Deep reinforcement learning">deep reinforcement learning</a>.<sup id="cite_ref-47" class="reference"><a href="#cite_note-47"><span class="cite-bracket">&#91;</span>47<span class="cite-bracket">&#93;</span></a></sup> The environment is not stationary anymore, thus the <a href="/wiki/Markov_property" title="Markov property">Markov property</a> is violated: transitions and rewards do not only depend on the current state of an agent. </p> <div class="mw-heading mw-heading2"><h2 id="Further_reading">Further reading</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=12" title="Edit section: Further reading"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1235681985">.mw-parser-output .side-box{margin:4px 0;box-sizing:border-box;border:1px solid #aaa;font-size:88%;line-height:1.25em;background-color:var(--background-color-interactive-subtle,#f8f9fa);display:flow-root}.mw-parser-output .side-box-abovebelow,.mw-parser-output .side-box-text{padding:0.25em 0.9em}.mw-parser-output .side-box-image{padding:2px 0 2px 0.9em;text-align:center}.mw-parser-output .side-box-imageright{padding:2px 0.9em 2px 0;text-align:center}@media(min-width:500px){.mw-parser-output .side-box-flex{display:flex;align-items:center}.mw-parser-output .side-box-text{flex:1;min-width:0}}@media(min-width:720px){.mw-parser-output .side-box{width:238px}.mw-parser-output .side-box-right{clear:right;float:right;margin-left:1em}.mw-parser-output .side-box-left{margin-right:1em}}</style><div class="side-box metadata side-box-right"><style data-mw-deduplicate="TemplateStyles:r1126788409">.mw-parser-output .plainlist ol,.mw-parser-output .plainlist ul{line-height:inherit;list-style:none;margin:0;padding:0}.mw-parser-output .plainlist ol li,.mw-parser-output .plainlist ul li{margin-bottom:0}</style> <div class="side-box-flex"> <div class="side-box-image"><span class="noviewer" typeof="mw:File"><span><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/3/32/Scholia_logo.svg/40px-Scholia_logo.svg.png" decoding="async" width="40" height="39" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/32/Scholia_logo.svg/60px-Scholia_logo.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/32/Scholia_logo.svg/80px-Scholia_logo.svg.png 2x" data-file-width="107" data-file-height="104" /></span></span></div> <div class="side-box-text plainlist"><a href="https://www.wikidata.org/wiki/Wikidata:Scholia" class="extiw" title="d:Wikidata:Scholia">Scholia</a> has a <i>topic</i> profile for <i><b><a href="https://iw.toolforge.org/scholia/topic/Q85786957" class="extiw" title="toolforge:scholia/topic/Q85786957">Multi-agent reinforcement learning</a></b></i>.</div></div> </div> <ul><li>Stefano V. Albrecht, Filippos Christianos, Lukas Schäfer. <i>Multi-Agent Reinforcement Learning: Foundations and Modern Approaches</i>. MIT Press, 2024. <a rel="nofollow" class="external text" href="https://www.marl-book.com/">https://www.marl-book.com</a></li> <li>Kaiqing Zhang, Zhuoran Yang, Tamer Basar. <i>Multi-agent reinforcement learning: A selective overview of theories and algorithms</i>. Studies in Systems, Decision and Control, Handbook on RL and Control, 2021. <a rel="nofollow" class="external autonumber" href="https://link.springer.com/chapter/10.1007/978-3-030-60990-0_12">[1]</a></li> <li><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite id="CITEREFYangWang2020" class="citation arxiv cs1">Yang, Yaodong; Wang, Jun (2020). "An Overview of Multi-Agent Reinforcement Learning from Game Theoretical Perspective". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2011.00583">2011.00583</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.MA">cs.MA</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=An+Overview+of+Multi-Agent+Reinforcement+Learning+from+Game+Theoretical+Perspective&amp;rft.date=2020&amp;rft_id=info%3Aarxiv%2F2011.00583&amp;rft.aulast=Yang&amp;rft.aufirst=Yaodong&amp;rft.au=Wang%2C+Jun&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></li></ul> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Multi-agent_reinforcement_learning&amp;action=edit&amp;section=13" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist"> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text">Stefano V. Albrecht, Filippos Christianos, Lukas Schäfer. <i>Multi-Agent Reinforcement Learning: Foundations and Modern Approaches.</i> MIT Press, 2024. <a rel="nofollow" class="external free" href="https://www.marl-book.com/">https://www.marl-book.com/</a></span> </li> <li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLoweWu2020" class="citation arxiv cs1">Lowe, Ryan; Wu, Yi (2020). "Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1706.02275v4">1706.02275v4</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Multi-Agent+Actor-Critic+for+Mixed+Cooperative-Competitive+Environments&amp;rft.date=2020&amp;rft_id=info%3Aarxiv%2F1706.02275v4&amp;rft.aulast=Lowe&amp;rft.aufirst=Ryan&amp;rft.au=Wu%2C+Yi&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-3">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFBaker2020" class="citation conference cs1">Baker, Bowen (2020). "Emergent Reciprocity and Team Formation from Randomized Uncertain Social Preferences". <i>NeurIPS 2020 proceedings</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2011.05373">2011.05373</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Emergent+Reciprocity+and+Team+Formation+from+Randomized+Uncertain+Social+Preferences&amp;rft.btitle=NeurIPS+2020+proceedings&amp;rft.date=2020&amp;rft_id=info%3Aarxiv%2F2011.05373&amp;rft.aulast=Baker&amp;rft.aufirst=Bowen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-Hughes_2018_inequity-4"><span class="mw-cite-backlink">^ <a href="#cite_ref-Hughes_2018_inequity_4-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Hughes_2018_inequity_4-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHughesLeibo2018" class="citation conference cs1">Hughes, Edward; Leibo, Joel Z.; et&#160;al. (2018). "Inequity aversion improves cooperation in intertemporal social dilemmas". <i>NeurIPS 2018 proceedings</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1803.08884">1803.08884</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Inequity+aversion+improves+cooperation+in+intertemporal+social+dilemmas&amp;rft.btitle=NeurIPS+2018+proceedings&amp;rft.date=2018&amp;rft_id=info%3Aarxiv%2F1803.08884&amp;rft.aulast=Hughes&amp;rft.aufirst=Edward&amp;rft.au=Leibo%2C+Joel+Z.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFJaquesLazaridouHughes2019" class="citation conference cs1">Jaques, Natasha; Lazaridou, Angeliki; Hughes, Edward; et&#160;al. (2019). "Social Influence as Intrinsic Motivation for Multi-Agent Deep Reinforcement Learning". <i>Proceedings of the 35th International Conference on Machine Learning</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1810.08647">1810.08647</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Social+Influence+as+Intrinsic+Motivation+for+Multi-Agent+Deep+Reinforcement+Learning&amp;rft.btitle=Proceedings+of+the+35th+International+Conference+on+Machine+Learning&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1810.08647&amp;rft.aulast=Jaques&amp;rft.aufirst=Natasha&amp;rft.au=Lazaridou%2C+Angeliki&amp;rft.au=Hughes%2C+Edward&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLazaridou2017" class="citation conference cs1">Lazaridou, Angeliki (2017). "Multi-Agent Cooperation and The Emergence of (Natural) Language". <i>ICLR 2017</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1612.07182">1612.07182</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Multi-Agent+Cooperation+and+The+Emergence+of+%28Natural%29+Language&amp;rft.btitle=ICLR+2017&amp;rft.date=2017&amp;rft_id=info%3Aarxiv%2F1612.07182&amp;rft.aulast=Lazaridou&amp;rft.aufirst=Angeliki&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-7">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFDuéñez-Guzmán2021" class="citation arxiv cs1">Duéñez-Guzmán, Edgar; et&#160;al. (2021). "Statistical discrimination in learning agents". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2110.11404v1">2110.11404v1</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Statistical+discrimination+in+learning+agents&amp;rft.date=2021&amp;rft_id=info%3Aarxiv%2F2110.11404v1&amp;rft.aulast=Du%C3%A9%C3%B1ez-Guzm%C3%A1n&amp;rft.aufirst=Edgar&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFCampbellHoaneHsu2002" class="citation journal cs1">Campbell, Murray; Hoane, A. Joseph Jr.; Hsu, Feng-hsiung (2002). "Deep Blue". <i>Artificial Intelligence</i>. <b>134</b> (1–2). Elsevier: 57–83. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1016%2FS0004-3702%2801%2900129-1">10.1016/S0004-3702(01)00129-1</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0004-3702">0004-3702</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Artificial+Intelligence&amp;rft.atitle=Deep+Blue&amp;rft.volume=134&amp;rft.issue=1%E2%80%932&amp;rft.pages=57-83&amp;rft.date=2002&amp;rft_id=info%3Adoi%2F10.1016%2FS0004-3702%2801%2900129-1&amp;rft.issn=0004-3702&amp;rft.aulast=Campbell&amp;rft.aufirst=Murray&amp;rft.au=Hoane%2C+A.+Joseph+Jr.&amp;rft.au=Hsu%2C+Feng-hsiung&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFCarroll2019" class="citation arxiv cs1">Carroll, Micah; et&#160;al. (2019). "On the Utility of Learning about Humans for Human-AI Coordination". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1910.05789">1910.05789</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=On+the+Utility+of+Learning+about+Humans+for+Human-AI+Coordination&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1910.05789&amp;rft.aulast=Carroll&amp;rft.aufirst=Micah&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-10">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFXieLoseyTolsmaFinn2020" class="citation conference cs1">Xie, Annie; Losey, Dylan; Tolsma, Ryan; <a href="/wiki/Chelsea_Finn" title="Chelsea Finn">Finn, Chelsea</a>; Sadigh, Dorsa (November 2020). <a rel="nofollow" class="external text" href="https://iliad.stanford.edu/pdfs/publications/xie2020learning.pdf"><i>Learning Latent Representations to Influence Multi-Agent Interaction</i></a> <span class="cs1-format">(PDF)</span>. CoRL.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Learning+Latent+Representations+to+Influence+Multi-Agent+Interaction&amp;rft.date=2020-11&amp;rft.aulast=Xie&amp;rft.aufirst=Annie&amp;rft.au=Losey%2C+Dylan&amp;rft.au=Tolsma%2C+Ryan&amp;rft.au=Finn%2C+Chelsea&amp;rft.au=Sadigh%2C+Dorsa&amp;rft_id=https%3A%2F%2Filiad.stanford.edu%2Fpdfs%2Fpublications%2Fxie2020learning.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-11">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFClarkWilkes-Gibbs1986" class="citation journal cs1">Clark, Herbert; Wilkes-Gibbs, Deanna (February 1986). "Referring as a collaborative process". <i>Cognition</i>. <b>22</b> (1): 1–39. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1016%2F0010-0277%2886%2990010-7">10.1016/0010-0277(86)90010-7</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/3709088">3709088</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:204981390">204981390</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Cognition&amp;rft.atitle=Referring+as+a+collaborative+process&amp;rft.volume=22&amp;rft.issue=1&amp;rft.pages=1-39&amp;rft.date=1986-02&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A204981390%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F3709088&amp;rft_id=info%3Adoi%2F10.1016%2F0010-0277%2886%2990010-7&amp;rft.aulast=Clark&amp;rft.aufirst=Herbert&amp;rft.au=Wilkes-Gibbs%2C+Deanna&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-12">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFBoutilier1996" class="citation journal cs1">Boutilier, Craig (17 March 1996). <a rel="nofollow" class="external text" href="https://dl.acm.org/doi/10.5555/1029693.1029710">"Planning, learning and coordination in multiagent decision processes"</a>. <i>Proceedings of the 6th Conference on Theoretical Aspects of Rationality and Knowledge</i>: 195–210.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Proceedings+of+the+6th+Conference+on+Theoretical+Aspects+of+Rationality+and+Knowledge&amp;rft.atitle=Planning%2C+learning+and+coordination+in+multiagent+decision+processes&amp;rft.pages=195-210&amp;rft.date=1996-03-17&amp;rft.aulast=Boutilier&amp;rft.aufirst=Craig&amp;rft_id=https%3A%2F%2Fdl.acm.org%2Fdoi%2F10.5555%2F1029693.1029710&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-13"><span class="mw-cite-backlink"><b><a href="#cite_ref-13">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFStoneKaminkaKrausRosenschein2010" class="citation conference cs1">Stone, Peter; Kaminka, Gal A.; Kraus, Sarit; Rosenschein, Jeffrey S. (July 2010). <a rel="nofollow" class="external text" href="https://www.cs.utexas.edu/~pstone/Papers/bib2html/b2hd-AAAI10-adhoc.html"><i>Ad Hoc Autonomous Agent Teams: Collaboration without Pre-Coordination</i></a>. AAAI 11.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Ad+Hoc+Autonomous+Agent+Teams%3A+Collaboration+without+Pre-Coordination&amp;rft.date=2010-07&amp;rft.aulast=Stone&amp;rft.aufirst=Peter&amp;rft.au=Kaminka%2C+Gal+A.&amp;rft.au=Kraus%2C+Sarit&amp;rft.au=Rosenschein%2C+Jeffrey+S.&amp;rft_id=https%3A%2F%2Fwww.cs.utexas.edu%2F~pstone%2FPapers%2Fbib2html%2Fb2hd-AAAI10-adhoc.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-14"><span class="mw-cite-backlink"><b><a href="#cite_ref-14">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFFoersterSongHughesBurch" class="citation conference cs1">Foerster, Jakob N.; Song, H. Francis; Hughes, Edward; Burch, Neil; Dunning, Iain; Whiteson, Shimon; Botvinick, Matthew M; Bowling, Michael H. <i>Bayesian action decoder for deep multi-agent reinforcement learning</i>. ICML 2019. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1811.01458">1811.01458</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Bayesian+action+decoder+for+deep+multi-agent+reinforcement+learning&amp;rft_id=info%3Aarxiv%2F1811.01458&amp;rft.aulast=Foerster&amp;rft.aufirst=Jakob+N.&amp;rft.au=Song%2C+H.+Francis&amp;rft.au=Hughes%2C+Edward&amp;rft.au=Burch%2C+Neil&amp;rft.au=Dunning%2C+Iain&amp;rft.au=Whiteson%2C+Shimon&amp;rft.au=Botvinick%2C+Matthew+M&amp;rft.au=Bowling%2C+Michael+H.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-15"><span class="mw-cite-backlink"><b><a href="#cite_ref-15">^</a></b></span> <span class="reference-text"> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFShihSawhneyKondicErmon" class="citation conference cs1">Shih, Andy; Sawhney, Arjun; Kondic, Jovana; Ermon, Stefano; Sadigh, Dorsa. <i>On the Critical Role of Conventions in Adaptive Human-AI Collaboration</i>. ICLR 2021. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2104.02871">2104.02871</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=On+the+Critical+Role+of+Conventions+in+Adaptive+Human-AI+Collaboration&amp;rft_id=info%3Aarxiv%2F2104.02871&amp;rft.aulast=Shih&amp;rft.aufirst=Andy&amp;rft.au=Sawhney%2C+Arjun&amp;rft.au=Kondic%2C+Jovana&amp;rft.au=Ermon%2C+Stefano&amp;rft.au=Sadigh%2C+Dorsa&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-16"><span class="mw-cite-backlink"><b><a href="#cite_ref-16">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFBettiniKortvelesyBlumenkampProrok2022" class="citation journal cs1">Bettini, Matteo; Kortvelesy, Ryan; Blumenkamp, Jan; Prorok, Amanda (2022). "VMAS: A Vectorized Multi-Agent Simulator for Collective Robot Learning". <i>The 16th International Symposium on Distributed Autonomous Robotic Systems</i>. Springer. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2207.03530">2207.03530</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=The+16th+International+Symposium+on+Distributed+Autonomous+Robotic+Systems&amp;rft.atitle=VMAS%3A+A+Vectorized+Multi-Agent+Simulator+for+Collective+Robot+Learning&amp;rft.date=2022&amp;rft_id=info%3Aarxiv%2F2207.03530&amp;rft.aulast=Bettini&amp;rft.aufirst=Matteo&amp;rft.au=Kortvelesy%2C+Ryan&amp;rft.au=Blumenkamp%2C+Jan&amp;rft.au=Prorok%2C+Amanda&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-17"><span class="mw-cite-backlink"><b><a href="#cite_ref-17">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFShalev-ShwartzShammahShashua2016" class="citation arxiv cs1">Shalev-Shwartz, Shai; Shammah, Shaked; Shashua, Amnon (2016). "Safe, Multi-Agent, Reinforcement Learning for Autonomous Driving". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1610.03295">1610.03295</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Safe%2C+Multi-Agent%2C+Reinforcement+Learning+for+Autonomous+Driving&amp;rft.date=2016&amp;rft_id=info%3Aarxiv%2F1610.03295&amp;rft.aulast=Shalev-Shwartz&amp;rft.aufirst=Shai&amp;rft.au=Shammah%2C+Shaked&amp;rft.au=Shashua%2C+Amnon&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-18">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFKopparapuDuéñez-GuzmánMatyasVezhnevets2022" class="citation arxiv cs1">Kopparapu, Kavya; Duéñez-Guzmán, Edgar A.; Matyas, Jayd; Vezhnevets, Alexander Sasha; Agapiou, John P.; McKee, Kevin R.; Everett, Richard; Marecki, Janusz; Leibo, Joel Z.; Graepel, Thore (2022). "Hidden Agenda: a Social Deduction Game with Diverse Learned Equilibria". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2201.01816">2201.01816</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Hidden+Agenda%3A+a+Social+Deduction+Game+with+Diverse+Learned+Equilibria&amp;rft.date=2022&amp;rft_id=info%3Aarxiv%2F2201.01816&amp;rft.aulast=Kopparapu&amp;rft.aufirst=Kavya&amp;rft.au=Du%C3%A9%C3%B1ez-Guzm%C3%A1n%2C+Edgar+A.&amp;rft.au=Matyas%2C+Jayd&amp;rft.au=Vezhnevets%2C+Alexander+Sasha&amp;rft.au=Agapiou%2C+John+P.&amp;rft.au=McKee%2C+Kevin+R.&amp;rft.au=Everett%2C+Richard&amp;rft.au=Marecki%2C+Janusz&amp;rft.au=Leibo%2C+Joel+Z.&amp;rft.au=Graepel%2C+Thore&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-19"><span class="mw-cite-backlink"><b><a href="#cite_ref-19">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFBakhtinBrown2022" class="citation journal cs1">Bakhtin, Anton; Brown, Noam; et&#160;al. (2022). <a rel="nofollow" class="external text" href="https://www.science.org/doi/abs/10.1126/science.ade9097">"Human-level play in the game of Diplomacy by combining language models with strategic reasoning"</a>. <i>Science</i>. <b>378</b> (6624). Springer: 1067–1074. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2022Sci...378.1067M">2022Sci...378.1067M</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1126%2Fscience.ade9097">10.1126/science.ade9097</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/36413172">36413172</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:253759631">253759631</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Science&amp;rft.atitle=Human-level+play+in+the+game+of+Diplomacy+by+combining+language+models+with+strategic+reasoning&amp;rft.volume=378&amp;rft.issue=6624&amp;rft.pages=1067-1074&amp;rft.date=2022&amp;rft_id=info%3Adoi%2F10.1126%2Fscience.ade9097&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A253759631%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F36413172&amp;rft_id=info%3Abibcode%2F2022Sci...378.1067M&amp;rft.aulast=Bakhtin&amp;rft.aufirst=Anton&amp;rft.au=Brown%2C+Noam&amp;rft_id=https%3A%2F%2Fwww.science.org%2Fdoi%2Fabs%2F10.1126%2Fscience.ade9097&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-20"><span class="mw-cite-backlink"><b><a href="#cite_ref-20">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSamvelyanRashidde_WittFarquhar2019" class="citation arxiv cs1">Samvelyan, Mikayel; Rashid, Tabish; de Witt, Christian Schroeder; Farquhar, Gregory; Nardelli, Nantas; Rudner, Tim G. J.; Hung, Chia-Man; Torr, Philip H. S.; Foerster, Jakob; Whiteson, Shimon (2019). "The StarCraft Multi-Agent Challenge". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1902.04043">1902.04043</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=The+StarCraft+Multi-Agent+Challenge&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1902.04043&amp;rft.aulast=Samvelyan&amp;rft.aufirst=Mikayel&amp;rft.au=Rashid%2C+Tabish&amp;rft.au=de+Witt%2C+Christian+Schroeder&amp;rft.au=Farquhar%2C+Gregory&amp;rft.au=Nardelli%2C+Nantas&amp;rft.au=Rudner%2C+Tim+G.+J.&amp;rft.au=Hung%2C+Chia-Man&amp;rft.au=Torr%2C+Philip+H.+S.&amp;rft.au=Foerster%2C+Jakob&amp;rft.au=Whiteson%2C+Shimon&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-21"><span class="mw-cite-backlink"><b><a href="#cite_ref-21">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFEllisMoallaSamvelyanSun2022" class="citation arxiv cs1">Ellis, Benjamin; Moalla, Skander; Samvelyan, Mikayel; Sun, Mingfei; Mahajan, Anuj; Foerster, Jakob N.; Whiteson, Shimon (2022). "SMACv2: An Improved Benchmark for Cooperative Multi-Agent Reinforcement Learning". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2212.07489">2212.07489</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=SMACv2%3A+An+Improved+Benchmark+for+Cooperative+Multi-Agent+Reinforcement+Learning&amp;rft.date=2022&amp;rft_id=info%3Aarxiv%2F2212.07489&amp;rft.aulast=Ellis&amp;rft.aufirst=Benjamin&amp;rft.au=Moalla%2C+Skander&amp;rft.au=Samvelyan%2C+Mikayel&amp;rft.au=Sun%2C+Mingfei&amp;rft.au=Mahajan%2C+Anuj&amp;rft.au=Foerster%2C+Jakob+N.&amp;rft.au=Whiteson%2C+Shimon&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-22"><span class="mw-cite-backlink"><b><a href="#cite_ref-22">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSandholmCrites1996" class="citation journal cs1">Sandholm, Toumas W.; Crites, Robert H. (1996). "Multiagent reinforcement learning in the Iterated Prisoner's Dilemma". <i>Biosystems</i>. <b>37</b> (1–2): 147–166. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/1996BiSys..37..147S">1996BiSys..37..147S</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1016%2F0303-2647%2895%2901551-5">10.1016/0303-2647(95)01551-5</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/8924633">8924633</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Biosystems&amp;rft.atitle=Multiagent+reinforcement+learning+in+the+Iterated+Prisoner%27s+Dilemma&amp;rft.volume=37&amp;rft.issue=1%E2%80%932&amp;rft.pages=147-166&amp;rft.date=1996&amp;rft_id=info%3Apmid%2F8924633&amp;rft_id=info%3Adoi%2F10.1016%2F0303-2647%2895%2901551-5&amp;rft_id=info%3Abibcode%2F1996BiSys..37..147S&amp;rft.aulast=Sandholm&amp;rft.aufirst=Toumas+W.&amp;rft.au=Crites%2C+Robert+H.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-23"><span class="mw-cite-backlink"><b><a href="#cite_ref-23">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFPeysakhovichLerer2018" class="citation conference cs1">Peysakhovich, Alexander; Lerer, Adam (2018). "Prosocial Learning Agents Solve Generalized Stag Hunts Better than Selfish Ones". <i>AAMAS 2018</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1709.02865">1709.02865</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Prosocial+Learning+Agents+Solve+Generalized+Stag+Hunts+Better+than+Selfish+Ones&amp;rft.btitle=AAMAS+2018&amp;rft.date=2018&amp;rft_id=info%3Aarxiv%2F1709.02865&amp;rft.aulast=Peysakhovich&amp;rft.aufirst=Alexander&amp;rft.au=Lerer%2C+Adam&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-24"><span class="mw-cite-backlink"><b><a href="#cite_ref-24">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFDafoeHughesBachrach2020" class="citation conference cs1">Dafoe, Allan; Hughes, Edward; Bachrach, Yoram; et&#160;al. (2020). "Open Problems in Cooperative AI". <i>NeurIPS 2020</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2012.08630">2012.08630</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Open+Problems+in+Cooperative+AI&amp;rft.btitle=NeurIPS+2020&amp;rft.date=2020&amp;rft_id=info%3Aarxiv%2F2012.08630&amp;rft.aulast=Dafoe&amp;rft.aufirst=Allan&amp;rft.au=Hughes%2C+Edward&amp;rft.au=Bachrach%2C+Yoram&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-25"><span class="mw-cite-backlink"><b><a href="#cite_ref-25">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFKösterHadfield-MenellHadfieldLeibo" class="citation conference cs1">Köster, Raphael; Hadfield-Menell, Dylan; Hadfield, Gillian K.; Leibo, Joel Z. "Silly rules improve the capacity of agents to learn stable enforcement and compliance behaviors". <i>AAMAS 2020</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2001.09318">2001.09318</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Silly+rules+improve+the+capacity+of+agents+to+learn+stable+enforcement+and+compliance+behaviors&amp;rft.btitle=AAMAS+2020&amp;rft_id=info%3Aarxiv%2F2001.09318&amp;rft.aulast=K%C3%B6ster&amp;rft.aufirst=Raphael&amp;rft.au=Hadfield-Menell%2C+Dylan&amp;rft.au=Hadfield%2C+Gillian+K.&amp;rft.au=Leibo%2C+Joel+Z.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-26"><span class="mw-cite-backlink"><b><a href="#cite_ref-26">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLeiboZambaldiLanctotMarecki2017" class="citation conference cs1">Leibo, Joel Z.; Zambaldi, Vinicius; Lanctot, Marc; Marecki, Janusz; Graepel, Thore (2017). "Multi-agent Reinforcement Learning in Sequential Social Dilemmas". <i>AAMAS 2017</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1702.03037">1702.03037</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Multi-agent+Reinforcement+Learning+in+Sequential+Social+Dilemmas&amp;rft.btitle=AAMAS+2017&amp;rft.date=2017&amp;rft_id=info%3Aarxiv%2F1702.03037&amp;rft.aulast=Leibo&amp;rft.aufirst=Joel+Z.&amp;rft.au=Zambaldi%2C+Vinicius&amp;rft.au=Lanctot%2C+Marc&amp;rft.au=Marecki%2C+Janusz&amp;rft.au=Graepel%2C+Thore&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-27"><span class="mw-cite-backlink"><b><a href="#cite_ref-27">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFBadjatiyaSarkar2020" class="citation arxiv cs1">Badjatiya, Pinkesh; Sarkar, Mausoom (2020). "Inducing Cooperative behaviour in Sequential-Social dilemmas through Multi-Agent Reinforcement Learning using Status-Quo Loss". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2001.05458">2001.05458</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Inducing+Cooperative+behaviour+in+Sequential-Social+dilemmas+through+Multi-Agent+Reinforcement+Learning+using+Status-Quo+Loss&amp;rft.date=2020&amp;rft_id=info%3Aarxiv%2F2001.05458&amp;rft.aulast=Badjatiya&amp;rft.aufirst=Pinkesh&amp;rft.au=Sarkar%2C+Mausoom&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-28"><span class="mw-cite-backlink"><b><a href="#cite_ref-28">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLeiboHughes2019" class="citation arxiv cs1">Leibo, Joel Z.; Hughes, Edward; et&#160;al. (2019). "Autocurricula and the Emergence of Innovation from Social Interaction: A Manifesto for Multi-Agent Intelligence Research". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1903.00742v2">1903.00742v2</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Autocurricula+and+the+Emergence+of+Innovation+from+Social+Interaction%3A+A+Manifesto+for+Multi-Agent+Intelligence+Research&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1903.00742v2&amp;rft.aulast=Leibo&amp;rft.aufirst=Joel+Z.&amp;rft.au=Hughes%2C+Edward&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-29"><span class="mw-cite-backlink"><b><a href="#cite_ref-29">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFBaker2020" class="citation conference cs1">Baker, Bowen; et&#160;al. (2020). "Emergent Tool Use From Multi-Agent Autocurricula". <i>ICLR 2020</i>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1909.07528">1909.07528</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Emergent+Tool+Use+From+Multi-Agent+Autocurricula&amp;rft.btitle=ICLR+2020&amp;rft.date=2020&amp;rft_id=info%3Aarxiv%2F1909.07528&amp;rft.aulast=Baker&amp;rft.aufirst=Bowen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-30"><span class="mw-cite-backlink"><b><a href="#cite_ref-30">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFKastingSiefert2002" class="citation journal cs1">Kasting, James F; Siefert, Janet L (2002). "Life and the evolution of earth's atmosphere". <i>Science</i>. <b>296</b> (5570): 1066–1068. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2002Sci...296.1066K">2002Sci...296.1066K</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1126%2Fscience.1071184">10.1126/science.1071184</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/12004117">12004117</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:37190778">37190778</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Science&amp;rft.atitle=Life+and+the+evolution+of+earth%27s+atmosphere&amp;rft.volume=296&amp;rft.issue=5570&amp;rft.pages=1066-1068&amp;rft.date=2002&amp;rft_id=info%3Adoi%2F10.1126%2Fscience.1071184&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A37190778%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F12004117&amp;rft_id=info%3Abibcode%2F2002Sci...296.1066K&amp;rft.aulast=Kasting&amp;rft.aufirst=James+F&amp;rft.au=Siefert%2C+Janet+L&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-31"><span class="mw-cite-backlink"><b><a href="#cite_ref-31">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFClark2008" class="citation book cs1">Clark, Gregory (2008). <i>A farewell to alms: a brief economic history of the world</i>. Princeton University Press. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-0-691-14128-2" title="Special:BookSources/978-0-691-14128-2"><bdi>978-0-691-14128-2</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=A+farewell+to+alms%3A+a+brief+economic+history+of+the+world&amp;rft.pub=Princeton+University+Press&amp;rft.date=2008&amp;rft.isbn=978-0-691-14128-2&amp;rft.aulast=Clark&amp;rft.aufirst=Gregory&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-Li_2022-32"><span class="mw-cite-backlink">^ <a href="#cite_ref-Li_2022_32-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Li_2022_32-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-Li_2022_32-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-Li_2022_32-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-Li_2022_32-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-Li_2022_32-5"><sup><i><b>f</b></i></sup></a> <a href="#cite_ref-Li_2022_32-6"><sup><i><b>g</b></i></sup></a> <a href="#cite_ref-Li_2022_32-7"><sup><i><b>h</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLiZhuLuongNiyato2021" class="citation arxiv cs1">Li, Tianxu; Zhu, Kun; Luong, Nguyen Cong; Niyato, Dusit; Wu, Qihui; Zhang, Yang; Chen, Bing (2021). "Applications of Multi-Agent Reinforcement Learning in Future Internet: A Comprehensive Survey". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2110.13484">2110.13484</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Applications+of+Multi-Agent+Reinforcement+Learning+in+Future+Internet%3A+A+Comprehensive+Survey&amp;rft.date=2021&amp;rft_id=info%3Aarxiv%2F2110.13484&amp;rft.aulast=Li&amp;rft.aufirst=Tianxu&amp;rft.au=Zhu%2C+Kun&amp;rft.au=Luong%2C+Nguyen+Cong&amp;rft.au=Niyato%2C+Dusit&amp;rft.au=Wu%2C+Qihui&amp;rft.au=Zhang%2C+Yang&amp;rft.au=Chen%2C+Bing&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-33"><span class="mw-cite-backlink"><b><a href="#cite_ref-33">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLeRathourYamazakiLuu2021" class="citation arxiv cs1">Le, Ngan; Rathour, Vidhiwar Singh; Yamazaki, Kashu; Luu, Khoa; Savvides, Marios (2021). "Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2108.11510">2108.11510</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CV">cs.CV</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Deep+Reinforcement+Learning+in+Computer+Vision%3A+A+Comprehensive+Survey&amp;rft.date=2021&amp;rft_id=info%3Aarxiv%2F2108.11510&amp;rft.aulast=Le&amp;rft.aufirst=Ngan&amp;rft.au=Rathour%2C+Vidhiwar+Singh&amp;rft.au=Yamazaki%2C+Kashu&amp;rft.au=Luu%2C+Khoa&amp;rft.au=Savvides%2C+Marios&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-34"><span class="mw-cite-backlink"><b><a href="#cite_ref-34">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFMoulin-FrierOudeyer2020" class="citation arxiv cs1">Moulin-Frier, Clément; Oudeyer, Pierre-Yves (2020). "Multi-Agent Reinforcement Learning as a Computational Tool for Language Evolution Research: Historical Context and Future Challenges". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2002.08878">2002.08878</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.MA">cs.MA</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Multi-Agent+Reinforcement+Learning+as+a+Computational+Tool+for+Language+Evolution+Research%3A+Historical+Context+and+Future+Challenges&amp;rft.date=2020&amp;rft_id=info%3Aarxiv%2F2002.08878&amp;rft.aulast=Moulin-Frier&amp;rft.aufirst=Cl%C3%A9ment&amp;rft.au=Oudeyer%2C+Pierre-Yves&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-35"><span class="mw-cite-backlink"><b><a href="#cite_ref-35">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFKillianXuBiswasVerma2023" class="citation conference cs1">Killian, Jackson; Xu, Lily; Biswas, Arpita; Verma, Shresth; et&#160;al. (2023). <i>Robust Planning over Restless Groups: Engagement Interventions for a Large-Scale Maternal Telehealth Program</i>. AAAI.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Robust+Planning+over+Restless+Groups%3A+Engagement+Interventions+for+a+Large-Scale+Maternal+Telehealth+Program&amp;rft.date=2023&amp;rft.aulast=Killian&amp;rft.aufirst=Jackson&amp;rft.au=Xu%2C+Lily&amp;rft.au=Biswas%2C+Arpita&amp;rft.au=Verma%2C+Shresth&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-36"><span class="mw-cite-backlink"><b><a href="#cite_ref-36">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFKrishnanJaquesOmidshafieiZhang2022" class="citation arxiv cs1">Krishnan, Srivatsan; Jaques, Natasha; Omidshafiei, Shayegan; Zhang, Dan; Gur, Izzeddin; Reddi, Vijay Janapa; Faust, Aleksandra (2022). "Multi-Agent Reinforcement Learning for Microprocessor Design Space Exploration". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2211.16385">2211.16385</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AR">cs.AR</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Multi-Agent+Reinforcement+Learning+for+Microprocessor+Design+Space+Exploration&amp;rft.date=2022&amp;rft_id=info%3Aarxiv%2F2211.16385&amp;rft.aulast=Krishnan&amp;rft.aufirst=Srivatsan&amp;rft.au=Jaques%2C+Natasha&amp;rft.au=Omidshafiei%2C+Shayegan&amp;rft.au=Zhang%2C+Dan&amp;rft.au=Gur%2C+Izzeddin&amp;rft.au=Reddi%2C+Vijay+Janapa&amp;rft.au=Faust%2C+Aleksandra&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-37"><span class="mw-cite-backlink"><b><a href="#cite_ref-37">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLiHeLiShi2023" class="citation journal cs1">Li, Yuanzheng; He, Shangyang; Li, Yang; Shi, Yang; Zeng, Zhigang (2023). "Federated Multiagent Deep Reinforcement Learning Approach via Physics-Informed Reward for Multimicrogrid Energy Management". <i>IEEE Transactions on Neural Networks and Learning Systems</i>. <b>PP</b> (5): 5902–5914. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2301.00641">2301.00641</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FTNNLS.2022.3232630">10.1109/TNNLS.2022.3232630</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/37018258">37018258</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:255372287">255372287</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=IEEE+Transactions+on+Neural+Networks+and+Learning+Systems&amp;rft.atitle=Federated+Multiagent+Deep+Reinforcement+Learning+Approach+via+Physics-Informed+Reward+for+Multimicrogrid+Energy+Management&amp;rft.volume=PP&amp;rft.issue=5&amp;rft.pages=5902-5914&amp;rft.date=2023&amp;rft_id=info%3Aarxiv%2F2301.00641&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A255372287%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F37018258&amp;rft_id=info%3Adoi%2F10.1109%2FTNNLS.2022.3232630&amp;rft.aulast=Li&amp;rft.aufirst=Yuanzheng&amp;rft.au=He%2C+Shangyang&amp;rft.au=Li%2C+Yang&amp;rft.au=Shi%2C+Yang&amp;rft.au=Zeng%2C+Zhigang&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-38"><span class="mw-cite-backlink"><b><a href="#cite_ref-38">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFCiLiuPanZhong2023" class="citation conference cs1">Ci, Hai; Liu, Mickel; Pan, Xuehai; Zhong, Fangwei; Wang, Yizhou (2023). <a rel="nofollow" class="external text" href="https://openreview.net/forum?id=CPIy9TWFYBG"><i>Proactive Multi-Camera Collaboration for 3D Human Pose Estimation</i></a>. International Conference on Learning Representations.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Proactive+Multi-Camera+Collaboration+for+3D+Human+Pose+Estimation&amp;rft.date=2023&amp;rft.aulast=Ci&amp;rft.aufirst=Hai&amp;rft.au=Liu%2C+Mickel&amp;rft.au=Pan%2C+Xuehai&amp;rft.au=Zhong%2C+Fangwei&amp;rft.au=Wang%2C+Yizhou&amp;rft_id=https%3A%2F%2Fopenreview.net%2Fforum%3Fid%3DCPIy9TWFYBG&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-39"><span class="mw-cite-backlink"><b><a href="#cite_ref-39">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFVinitskyKreidiehLe_FlemKheterpal2018" class="citation conference cs1">Vinitsky, Eugene; Kreidieh, Aboudy; Le Flem, Luc; Kheterpal, Nishant; Jang, Kathy; Wu, Fangyu; Liaw, Richard; Liang, Eric; Bayen, Alexandre M. (2018). <a rel="nofollow" class="external text" href="http://proceedings.mlr.press/v87/vinitsky18a/vinitsky18a.pdf"><i>Benchmarks for reinforcement learning in mixed-autonomy traffic</i></a> <span class="cs1-format">(PDF)</span>. Conference on Robot Learning.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Benchmarks+for+reinforcement+learning+in+mixed-autonomy+traffic&amp;rft.date=2018&amp;rft.aulast=Vinitsky&amp;rft.aufirst=Eugene&amp;rft.au=Kreidieh%2C+Aboudy&amp;rft.au=Le+Flem%2C+Luc&amp;rft.au=Kheterpal%2C+Nishant&amp;rft.au=Jang%2C+Kathy&amp;rft.au=Wu%2C+Fangyu&amp;rft.au=Liaw%2C+Richard&amp;rft.au=Liang%2C+Eric&amp;rft.au=Bayen%2C+Alexandre+M.&amp;rft_id=http%3A%2F%2Fproceedings.mlr.press%2Fv87%2Fvinitsky18a%2Fvinitsky18a.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-40"><span class="mw-cite-backlink"><b><a href="#cite_ref-40">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFTuylsOmidshafieiMullerWang2020" class="citation arxiv cs1">Tuyls, Karl; Omidshafiei, Shayegan; Muller, Paul; Wang, Zhe; Connor, Jerome; Hennes, Daniel; Graham, Ian; Spearman, William; Waskett, Tim; Steele, Dafydd; Luc, Pauline; Recasens, Adria; Galashov, Alexandre; Thornton, Gregory; Elie, Romuald; Sprechmann, Pablo; Moreno, Pol; Cao, Kris; Garnelo, Marta; Dutta, Praneet; Valko, Michal; Heess, Nicolas; Bridgland, Alex; Perolat, Julien; De Vylder, Bart; Eslami, Ali; Rowland, Mark; Jaegle, Andrew; Munos, Remi; Back, Trevor; Ahamed, Razia; Bouton, Simon; Beauguerlange, Nathalie; Broshear, Jackson; Graepel, Thore; Hassabis, Demis (2020). "Game Plan: What AI can do for Football, and What Football can do for AI". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2011.09192">2011.09192</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Game+Plan%3A+What+AI+can+do+for+Football%2C+and+What+Football+can+do+for+AI&amp;rft.date=2020&amp;rft_id=info%3Aarxiv%2F2011.09192&amp;rft.aulast=Tuyls&amp;rft.aufirst=Karl&amp;rft.au=Omidshafiei%2C+Shayegan&amp;rft.au=Muller%2C+Paul&amp;rft.au=Wang%2C+Zhe&amp;rft.au=Connor%2C+Jerome&amp;rft.au=Hennes%2C+Daniel&amp;rft.au=Graham%2C+Ian&amp;rft.au=Spearman%2C+William&amp;rft.au=Waskett%2C+Tim&amp;rft.au=Steele%2C+Dafydd&amp;rft.au=Luc%2C+Pauline&amp;rft.au=Recasens%2C+Adria&amp;rft.au=Galashov%2C+Alexandre&amp;rft.au=Thornton%2C+Gregory&amp;rft.au=Elie%2C+Romuald&amp;rft.au=Sprechmann%2C+Pablo&amp;rft.au=Moreno%2C+Pol&amp;rft.au=Cao%2C+Kris&amp;rft.au=Garnelo%2C+Marta&amp;rft.au=Dutta%2C+Praneet&amp;rft.au=Valko%2C+Michal&amp;rft.au=Heess%2C+Nicolas&amp;rft.au=Bridgland%2C+Alex&amp;rft.au=Perolat%2C+Julien&amp;rft.au=De+Vylder%2C+Bart&amp;rft.au=Eslami%2C+Ali&amp;rft.au=Rowland%2C+Mark&amp;rft.au=Jaegle%2C+Andrew&amp;rft.au=Munos%2C+Remi&amp;rft.au=Back%2C+Trevor&amp;rft.au=Ahamed%2C+Razia&amp;rft.au=Bouton%2C+Simon&amp;rft.au=Beauguerlange%2C+Nathalie&amp;rft.au=Broshear%2C+Jackson&amp;rft.au=Graepel%2C+Thore&amp;rft.au=Hassabis%2C+Demis&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-41"><span class="mw-cite-backlink"><b><a href="#cite_ref-41">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFChuWangCodec├áLi2019" class="citation arxiv cs1">Chu, Tianshu; Wang, Jie; Codec├á, Lara; Li, Zhaojian (2019). "Multi-Agent Deep Reinforcement Learning for Large-scale Traffic Signal Control". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1903.04527">1903.04527</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Multi-Agent+Deep+Reinforcement+Learning+for+Large-scale+Traffic+Signal+Control&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1903.04527&amp;rft.aulast=Chu&amp;rft.aufirst=Tianshu&amp;rft.au=Wang%2C+Jie&amp;rft.au=Codec%E2%94%9C%C3%A1%2C+Lara&amp;rft.au=Li%2C+Zhaojian&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-42"><span class="mw-cite-backlink"><b><a href="#cite_ref-42">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFBellettiHazizaGomesBayen2017" class="citation arxiv cs1">Belletti, Francois; Haziza, Daniel; Gomes, Gabriel; Bayen, Alexandre M. (2017). "Expert Level control of Ramp Metering based on Multi-task Deep Reinforcement Learning". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1701.08832">1701.08832</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Expert+Level+control+of+Ramp+Metering+based+on+Multi-task+Deep+Reinforcement+Learning&amp;rft.date=2017&amp;rft_id=info%3Aarxiv%2F1701.08832&amp;rft.aulast=Belletti&amp;rft.aufirst=Francois&amp;rft.au=Haziza%2C+Daniel&amp;rft.au=Gomes%2C+Gabriel&amp;rft.au=Bayen%2C+Alexandre+M.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-43"><span class="mw-cite-backlink"><b><a href="#cite_ref-43">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFDingYangPhamZhang2023" class="citation arxiv cs1">Ding, Yahao; Yang, Zhaohui; Pham, Quoc-Viet; Zhang, Zhaoyang; Shikh-Bahaei, Mohammad (2023). "Distributed Machine Learning for UAV Swarms: Computing, Sensing, and Semantics". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2301.00912">2301.00912</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Distributed+Machine+Learning+for+UAV+Swarms%3A+Computing%2C+Sensing%2C+and+Semantics&amp;rft.date=2023&amp;rft_id=info%3Aarxiv%2F2301.00912&amp;rft.aulast=Ding&amp;rft.aufirst=Yahao&amp;rft.au=Yang%2C+Zhaohui&amp;rft.au=Pham%2C+Quoc-Viet&amp;rft.au=Zhang%2C+Zhaoyang&amp;rft.au=Shikh-Bahaei%2C+Mohammad&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-44"><span class="mw-cite-backlink"><b><a href="#cite_ref-44">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFXuPerraultFangChen2021" class="citation arxiv cs1">Xu, Lily; Perrault, Andrew; Fang, Fei; Chen, Haipeng; Tambe, Milind (2021). "Robust Reinforcement Learning Under Minimax Regret for Green Security". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2106.08413">2106.08413</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Robust+Reinforcement+Learning+Under+Minimax+Regret+for+Green+Security&amp;rft.date=2021&amp;rft_id=info%3Aarxiv%2F2106.08413&amp;rft.aulast=Xu&amp;rft.aufirst=Lily&amp;rft.au=Perrault%2C+Andrew&amp;rft.au=Fang%2C+Fei&amp;rft.au=Chen%2C+Haipeng&amp;rft.au=Tambe%2C+Milind&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-45"><span class="mw-cite-backlink"><b><a href="#cite_ref-45">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLeikeMarticKrakovnaOrtega2017" class="citation arxiv cs1">Leike, Jan; Martic, Miljan; Krakovna, Victoria; Ortega, Pedro A.; Everitt, Tom; Lefrancq, Andrew; Orseau, Laurent; Legg, Shane (2017). "AI Safety Gridworlds". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1711.09883">1711.09883</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=AI+Safety+Gridworlds&amp;rft.date=2017&amp;rft_id=info%3Aarxiv%2F1711.09883&amp;rft.aulast=Leike&amp;rft.aufirst=Jan&amp;rft.au=Martic%2C+Miljan&amp;rft.au=Krakovna%2C+Victoria&amp;rft.au=Ortega%2C+Pedro+A.&amp;rft.au=Everitt%2C+Tom&amp;rft.au=Lefrancq%2C+Andrew&amp;rft.au=Orseau%2C+Laurent&amp;rft.au=Legg%2C+Shane&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-46"><span class="mw-cite-backlink"><b><a href="#cite_ref-46">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHadfield-MenellDraganAbbeelRussell2016" class="citation arxiv cs1">Hadfield-Menell, Dylan; Dragan, Anca; Abbeel, Pieter; Russell, Stuart (2016). "The Off-Switch Game". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1611.08219">1611.08219</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AI">cs.AI</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=The+Off-Switch+Game&amp;rft.date=2016&amp;rft_id=info%3Aarxiv%2F1611.08219&amp;rft.aulast=Hadfield-Menell&amp;rft.aufirst=Dylan&amp;rft.au=Dragan%2C+Anca&amp;rft.au=Abbeel%2C+Pieter&amp;rft.au=Russell%2C+Stuart&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> <li id="cite_note-47"><span class="mw-cite-backlink"><b><a href="#cite_ref-47">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHernandez-LealKartalTaylor2019" class="citation journal cs1">Hernandez-Leal, Pablo; Kartal, Bilal; Taylor, Matthew E. (2019-11-01). "A survey and critique of multiagent deep reinforcement learning". <i>Autonomous Agents and Multi-Agent Systems</i>. <b>33</b> (6): 750–797. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1810.05587">1810.05587</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2Fs10458-019-09421-1">10.1007/s10458-019-09421-1</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/1573-7454">1573-7454</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:52981002">52981002</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Autonomous+Agents+and+Multi-Agent+Systems&amp;rft.atitle=A+survey+and+critique+of+multiagent+deep+reinforcement+learning&amp;rft.volume=33&amp;rft.issue=6&amp;rft.pages=750-797&amp;rft.date=2019-11-01&amp;rft_id=info%3Aarxiv%2F1810.05587&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A52981002%23id-name%3DS2CID&amp;rft.issn=1573-7454&amp;rft_id=info%3Adoi%2F10.1007%2Fs10458-019-09421-1&amp;rft.aulast=Hernandez-Leal&amp;rft.aufirst=Pablo&amp;rft.au=Kartal%2C+Bilal&amp;rft.au=Taylor%2C+Matthew+E.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMulti-agent+reinforcement+learning" class="Z3988"></span></span> </li> </ol></div></div> <!-- NewPP limit report Parsed by mw‐web.eqiad.main‐5dc468848‐p7l2r Cached time: 20241122151753 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.761 seconds Real time usage: 0.992 seconds Preprocessor visited node count: 3630/1000000 Post‐expand include size: 155208/2097152 bytes Template argument size: 7803/2097152 bytes Highest expansion depth: 12/100 Expensive parser function count: 1/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 184431/5000000 bytes Lua time usage: 0.498/10.000 seconds Lua memory usage: 6407012/52428800 bytes Number of Wikibase entities loaded: 0/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 794.099 1 -total 44.11% 350.267 1 Template:Reflist 30.25% 240.235 21 Template:Cite_arXiv 16.44% 130.588 1 Template:Short_description 15.06% 119.607 1 Template:Machine_learning 13.37% 106.140 1 Template:Sidebar_with_collapsible_lists 12.73% 101.066 16 Template:Cite_conference 11.14% 88.487 2 Template:Pagetype 6.81% 54.083 9 Template:Cite_journal 5.41% 42.964 1 Template:Scholia --> <!-- Saved in parser cache with key enwiki:pcache:idhash:62285602-0!canonical and timestamp 20241122151753 and revision id 1236297949. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Multi-agent_reinforcement_learning&amp;oldid=1236297949">https://en.wikipedia.org/w/index.php?title=Multi-agent_reinforcement_learning&amp;oldid=1236297949</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Reinforcement_learning" title="Category:Reinforcement learning">Reinforcement learning</a></li><li><a href="/wiki/Category:Multi-agent_systems" title="Category:Multi-agent systems">Multi-agent systems</a></li><li><a href="/wiki/Category:Deep_learning" title="Category:Deep learning">Deep learning</a></li><li><a href="/wiki/Category:Game_theory" title="Category:Game theory">Game theory</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_matches_Wikidata" title="Category:Short description matches Wikidata">Short description matches Wikidata</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 23 July 2024, at 23:14<span class="anonymous-show">&#160;(UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Multi-agent_reinforcement_learning&amp;mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-f69cdc8f6-h2npf","wgBackendResponseTime":146,"wgPageParseReport":{"limitreport":{"cputime":"0.761","walltime":"0.992","ppvisitednodes":{"value":3630,"limit":1000000},"postexpandincludesize":{"value":155208,"limit":2097152},"templateargumentsize":{"value":7803,"limit":2097152},"expansiondepth":{"value":12,"limit":100},"expensivefunctioncount":{"value":1,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":184431,"limit":5000000},"entityaccesscount":{"value":0,"limit":400},"timingprofile":["100.00% 794.099 1 -total"," 44.11% 350.267 1 Template:Reflist"," 30.25% 240.235 21 Template:Cite_arXiv"," 16.44% 130.588 1 Template:Short_description"," 15.06% 119.607 1 Template:Machine_learning"," 13.37% 106.140 1 Template:Sidebar_with_collapsible_lists"," 12.73% 101.066 16 Template:Cite_conference"," 11.14% 88.487 2 Template:Pagetype"," 6.81% 54.083 9 Template:Cite_journal"," 5.41% 42.964 1 Template:Scholia"]},"scribunto":{"limitreport-timeusage":{"value":"0.498","limit":"10.000"},"limitreport-memusage":{"value":6407012,"limit":52428800}},"cachereport":{"origin":"mw-web.eqiad.main-5dc468848-p7l2r","timestamp":"20241122151753","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Multi-agent reinforcement learning","url":"https:\/\/en.wikipedia.org\/wiki\/Multi-agent_reinforcement_learning","sameAs":"http:\/\/www.wikidata.org\/entity\/Q85786957","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q85786957","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2019-11-08T10:57:35Z","dateModified":"2024-07-23T23:14:37Z","image":"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/d\/da\/Magent-graph-2.gif","headline":"sub-field of reinforcement learning"}</script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10