CINXE.COM
Online machine learning - Wikipedia
<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Online machine learning - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"096a0cf5-67d9-44a9-85df-f9490b7f46cb","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Online_machine_learning","wgTitle":"Online machine learning","wgCurRevisionId":1243080168,"wgRevisionId":1243080168,"wgArticleId":19892153,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description matches Wikidata","All articles with unsourced statements","Articles with unsourced statements from September 2019","Online algorithms","Machine learning algorithms"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Online_machine_learning","wgRelevantArticleId":19892153,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true, "wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":30000,"wgRelatedArticlesCompat":[],"wgCentralAuthMobileDomain":false,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q7094097","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false, "wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.math.styles":"ready","ext.cite.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","ext.popups","ext.visualEditor.desktopArticleTarget.init", "ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession","wikibase.sidebar.tracking"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.cite.styles%7Cext.math.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&only=styles&skin=vector-2022"> <script async="" src="/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&modules=site.styles&only=styles&skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Online machine learning - Wikipedia"> <meta property="og:type" content="website"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Online_machine_learning"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Online_machine_learning&action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Online_machine_learning"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Online_machine_learning rootpage-Online_machine_learning skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page's font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&returnto=Online+machine+learning" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&returnto=Online+machine+learning" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&returnto=Online+machine+learning" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&returnto=Online+machine+learning" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-Introduction" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Introduction"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Introduction</span> </div> </a> <ul id="toc-Introduction-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Statistical_view_of_online_learning" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Statistical_view_of_online_learning"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Statistical view of online learning</span> </div> </a> <button aria-controls="toc-Statistical_view_of_online_learning-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Statistical view of online learning subsection</span> </button> <ul id="toc-Statistical_view_of_online_learning-sublist" class="vector-toc-list"> <li id="toc-Example:_linear_least_squares" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Example:_linear_least_squares"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1</span> <span>Example: linear least squares</span> </div> </a> <ul id="toc-Example:_linear_least_squares-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Batch_learning" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Batch_learning"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.2</span> <span>Batch learning</span> </div> </a> <ul id="toc-Batch_learning-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Online_learning:_recursive_least_squares" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Online_learning:_recursive_least_squares"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.3</span> <span>Online learning: recursive least squares</span> </div> </a> <ul id="toc-Online_learning:_recursive_least_squares-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Stochastic_gradient_descent" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Stochastic_gradient_descent"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.4</span> <span>Stochastic gradient descent</span> </div> </a> <ul id="toc-Stochastic_gradient_descent-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Incremental_stochastic_gradient_descent" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Incremental_stochastic_gradient_descent"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.5</span> <span>Incremental stochastic gradient descent</span> </div> </a> <ul id="toc-Incremental_stochastic_gradient_descent-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Kernel_methods" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Kernel_methods"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.6</span> <span>Kernel methods</span> </div> </a> <ul id="toc-Kernel_methods-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Online_convex_optimization" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Online_convex_optimization"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.7</span> <span>Online convex optimization</span> </div> </a> <ul id="toc-Online_convex_optimization-sublist" class="vector-toc-list"> <li id="toc-Follow_the_leader_(FTL)" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Follow_the_leader_(FTL)"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.7.1</span> <span>Follow the leader (FTL)</span> </div> </a> <ul id="toc-Follow_the_leader_(FTL)-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Follow_the_regularised_leader_(FTRL)" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Follow_the_regularised_leader_(FTRL)"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.7.2</span> <span>Follow the regularised leader (FTRL)</span> </div> </a> <ul id="toc-Follow_the_regularised_leader_(FTRL)-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Online_subgradient_descent_(OSD)" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Online_subgradient_descent_(OSD)"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.8</span> <span>Online subgradient descent (OSD)</span> </div> </a> <ul id="toc-Online_subgradient_descent_(OSD)-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Other_algorithms" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Other_algorithms"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.9</span> <span>Other algorithms</span> </div> </a> <ul id="toc-Other_algorithms-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Continual_learning" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Continual_learning"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Continual learning</span> </div> </a> <ul id="toc-Continual_learning-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Interpretations_of_online_learning" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Interpretations_of_online_learning"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Interpretations of online learning</span> </div> </a> <ul id="toc-Interpretations_of_online_learning-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Implementations" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Implementations"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>Implementations</span> </div> </a> <ul id="toc-Implementations-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-External_links" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#External_links"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>External links</span> </div> </a> <ul id="toc-External_links-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Online machine learning</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 12 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-12" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">12 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-ar mw-list-item"><a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B9%D9%84%D9%85_%D8%A7%D9%84%D8%A2%D9%84%D8%A9_%D8%A7%D9%84%D9%85%D8%B3%D8%AA%D9%85%D8%B1" title="تعلم الآلة المستمر – Arabic" lang="ar" hreflang="ar" data-title="تعلم الآلة المستمر" data-language-autonym="العربية" data-language-local-name="Arabic" class="interlanguage-link-target"><span>العربية</span></a></li><li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/Aprenentatge_autom%C3%A0tic_en_l%C3%ADnia" title="Aprenentatge automàtic en línia – Catalan" lang="ca" hreflang="ca" data-title="Aprenentatge automàtic en línia" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/Aprendizaje_autom%C3%A1tico_online" title="Aprendizaje automático online – Spanish" lang="es" hreflang="es" data-title="Aprendizaje automático online" data-language-autonym="Español" data-language-local-name="Spanish" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%DB%8C%D8%A7%D8%AF%DA%AF%DB%8C%D8%B1%DB%8C_%D9%85%D8%A7%D8%B4%DB%8C%D9%86_%D8%A8%D8%B1%D8%AE%D8%B7" title="یادگیری ماشین برخط – Persian" lang="fa" hreflang="fa" data-title="یادگیری ماشین برخط" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-fr mw-list-item"><a href="https://fr.wikipedia.org/wiki/Apprentissage_incr%C3%A9mental" title="Apprentissage incrémental – French" lang="fr" hreflang="fr" data-title="Apprentissage incrémental" data-language-autonym="Français" data-language-local-name="French" class="interlanguage-link-target"><span>Français</span></a></li><li class="interlanguage-link interwiki-id mw-list-item"><a href="https://id.wikipedia.org/wiki/Pemelajaran_mesin_daring" title="Pemelajaran mesin daring – Indonesian" lang="id" hreflang="id" data-title="Pemelajaran mesin daring" data-language-autonym="Bahasa Indonesia" data-language-local-name="Indonesian" class="interlanguage-link-target"><span>Bahasa Indonesia</span></a></li><li class="interlanguage-link interwiki-pt mw-list-item"><a href="https://pt.wikipedia.org/wiki/Aprendizagem_de_m%C3%A1quina_online" title="Aprendizagem de máquina online – Portuguese" lang="pt" hreflang="pt" data-title="Aprendizagem de máquina online" data-language-autonym="Português" data-language-local-name="Portuguese" class="interlanguage-link-target"><span>Português</span></a></li><li class="interlanguage-link interwiki-ru mw-list-item"><a href="https://ru.wikipedia.org/wiki/%D0%9E%D0%BD%D0%BB%D0%B0%D0%B9%D0%BD%D0%BE%D0%B2%D0%BE%D0%B5_%D0%BC%D0%B0%D1%88%D0%B8%D0%BD%D0%BD%D0%BE%D0%B5_%D0%BE%D0%B1%D1%83%D1%87%D0%B5%D0%BD%D0%B8%D0%B5" title="Онлайновое машинное обучение – Russian" lang="ru" hreflang="ru" data-title="Онлайновое машинное обучение" data-language-autonym="Русский" data-language-local-name="Russian" class="interlanguage-link-target"><span>Русский</span></a></li><li class="interlanguage-link interwiki-sr mw-list-item"><a href="https://sr.wikipedia.org/wiki/%D0%9C%D0%B0%D1%88%D0%B8%D0%BD%D1%81%D0%BA%D0%BE_%D1%83%D1%87%D0%B5%D1%9A%D0%B5_%D0%BD%D0%B0_%D0%BC%D1%80%D0%B5%D0%B6%D0%B8" title="Машинско учење на мрежи – Serbian" lang="sr" hreflang="sr" data-title="Машинско учење на мрежи" data-language-autonym="Српски / srpski" data-language-local-name="Serbian" class="interlanguage-link-target"><span>Српски / srpski</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/%D0%86%D0%BD%D1%82%D0%B5%D1%80%D0%B0%D0%BA%D1%82%D0%B8%D0%B2%D0%BD%D0%B5_%D0%BC%D0%B0%D1%88%D0%B8%D0%BD%D0%BD%D0%B5_%D0%BD%D0%B0%D0%B2%D1%87%D0%B0%D0%BD%D0%BD%D1%8F" title="Інтерактивне машинне навчання – Ukrainian" lang="uk" hreflang="uk" data-title="Інтерактивне машинне навчання" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/%E5%9C%A8%E7%B7%9A%E6%A9%9F%E6%A2%B0%E5%AD%B8%E7%BF%92" title="在線機械學習 – Cantonese" lang="yue" hreflang="yue" data-title="在線機械學習" data-language-autonym="粵語" data-language-local-name="Cantonese" class="interlanguage-link-target"><span>粵語</span></a></li><li class="interlanguage-link interwiki-zh mw-list-item"><a href="https://zh.wikipedia.org/wiki/%E7%B7%9A%E4%B8%8A%E6%A9%9F%E5%99%A8%E5%AD%B8%E7%BF%92" title="線上機器學習 – Chinese" lang="zh" hreflang="zh" data-title="線上機器學習" data-language-autonym="中文" data-language-local-name="Chinese" class="interlanguage-link-target"><span>中文</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q7094097#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Online_machine_learning" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Online_machine_learning" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Online_machine_learning"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Online_machine_learning&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Online_machine_learning&action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Online_machine_learning"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Online_machine_learning&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Online_machine_learning&action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Online_machine_learning" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Online_machine_learning" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Online_machine_learning&oldid=1243080168" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Online_machine_learning&action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&page=Online_machine_learning&id=1243080168&wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FOnline_machine_learning"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FOnline_machine_learning"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&page=Online_machine_learning&action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Online_machine_learning&printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q7094097" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><style data-mw-deduplicate="TemplateStyles:r1236090951">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}@media print{body.ns-0 .mw-parser-output .hatnote{display:none!important}}</style><div role="note" class="hatnote navigation-not-searchable">Not to be confused with <a href="/wiki/Online_and_offline" title="Online and offline">online and offline</a>.</div> <div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Method of machine learning</div> <style data-mw-deduplicate="TemplateStyles:r1244144826">.mw-parser-output .machine-learning-list-title{background-color:#ddddff}html.skin-theme-clientpref-night .mw-parser-output .machine-learning-list-title{background-color:#222}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .machine-learning-list-title{background-color:#222}}</style> <style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1246091330">.mw-parser-output .sidebar{width:22em;float:right;clear:right;margin:0.5em 0 1em 1em;background:var(--background-color-neutral-subtle,#f8f9fa);border:1px solid var(--border-color-base,#a2a9b1);padding:0.2em;text-align:center;line-height:1.4em;font-size:88%;border-collapse:collapse;display:table}body.skin-minerva .mw-parser-output .sidebar{display:table!important;float:right!important;margin:0.5em 0 1em 1em!important}.mw-parser-output .sidebar-subgroup{width:100%;margin:0;border-spacing:0}.mw-parser-output .sidebar-left{float:left;clear:left;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-none{float:none;clear:both;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-outer-title{padding:0 0.4em 0.2em;font-size:125%;line-height:1.2em;font-weight:bold}.mw-parser-output .sidebar-top-image{padding:0.4em}.mw-parser-output .sidebar-top-caption,.mw-parser-output .sidebar-pretitle-with-top-image,.mw-parser-output .sidebar-caption{padding:0.2em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-pretitle{padding:0.4em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-title,.mw-parser-output .sidebar-title-with-pretitle{padding:0.2em 0.8em;font-size:145%;line-height:1.2em}.mw-parser-output .sidebar-title-with-pretitle{padding:0.1em 0.4em}.mw-parser-output .sidebar-image{padding:0.2em 0.4em 0.4em}.mw-parser-output .sidebar-heading{padding:0.1em 0.4em}.mw-parser-output .sidebar-content{padding:0 0.5em 0.4em}.mw-parser-output .sidebar-content-with-subgroup{padding:0.1em 0.4em 0.2em}.mw-parser-output .sidebar-above,.mw-parser-output .sidebar-below{padding:0.3em 0.8em;font-weight:bold}.mw-parser-output .sidebar-collapse .sidebar-above,.mw-parser-output .sidebar-collapse .sidebar-below{border-top:1px solid #aaa;border-bottom:1px solid #aaa}.mw-parser-output .sidebar-navbar{text-align:right;font-size:115%;padding:0 0.4em 0.4em}.mw-parser-output .sidebar-list-title{padding:0 0.4em;text-align:left;font-weight:bold;line-height:1.6em;font-size:105%}.mw-parser-output .sidebar-list-title-c{padding:0 0.4em;text-align:center;margin:0 3.3em}@media(max-width:640px){body.mediawiki .mw-parser-output .sidebar{width:100%!important;clear:both;float:none!important;margin-left:0!important;margin-right:0!important}}body.skin--responsive .mw-parser-output .sidebar a>img{max-width:none!important}@media screen{html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media print{body.ns-0 .mw-parser-output .sidebar{display:none!important}}</style><style data-mw-deduplicate="TemplateStyles:r886047488">.mw-parser-output .nobold{font-weight:normal}</style><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r886047488"><table class="sidebar sidebar-collapse nomobile nowraplinks"><tbody><tr><td class="sidebar-pretitle">Part of a series on</td></tr><tr><th class="sidebar-title-with-pretitle"><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a><br />and <a href="/wiki/Data_mining" title="Data mining">data mining</a></th></tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Paradigms</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a></li> <li><a href="/wiki/Unsupervised_learning" title="Unsupervised learning">Unsupervised learning</a></li> <li><a href="/wiki/Semi-supervised_learning" class="mw-redirect" title="Semi-supervised learning">Semi-supervised learning</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></li> <li><a href="/wiki/Meta-learning_(computer_science)" title="Meta-learning (computer science)">Meta-learning</a></li> <li><a class="mw-selflink selflink">Online learning</a></li> <li><a href="/wiki/Batch_learning" class="mw-redirect" title="Batch learning">Batch learning</a></li> <li><a href="/wiki/Curriculum_learning" title="Curriculum learning">Curriculum learning</a></li> <li><a href="/wiki/Rule-based_machine_learning" title="Rule-based machine learning">Rule-based learning</a></li> <li><a href="/wiki/Neuro-symbolic_AI" title="Neuro-symbolic AI">Neuro-symbolic AI</a></li> <li><a href="/wiki/Neuromorphic_engineering" class="mw-redirect" title="Neuromorphic engineering">Neuromorphic engineering</a></li> <li><a href="/wiki/Quantum_machine_learning" title="Quantum machine learning">Quantum machine learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Problems</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Statistical_classification" title="Statistical classification">Classification</a></li> <li><a href="/wiki/Generative_model" title="Generative model">Generative modeling</a></li> <li><a href="/wiki/Regression_analysis" title="Regression analysis">Regression</a></li> <li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></li> <li><a href="/wiki/Dimensionality_reduction" title="Dimensionality reduction">Dimensionality reduction</a></li> <li><a href="/wiki/Density_estimation" title="Density estimation">Density estimation</a></li> <li><a href="/wiki/Anomaly_detection" title="Anomaly detection">Anomaly detection</a></li> <li><a href="/wiki/Data_cleaning" class="mw-redirect" title="Data cleaning">Data cleaning</a></li> <li><a href="/wiki/Automated_machine_learning" title="Automated machine learning">AutoML</a></li> <li><a href="/wiki/Association_rule_learning" title="Association rule learning">Association rules</a></li> <li><a href="/wiki/Semantic_analysis_(machine_learning)" title="Semantic analysis (machine learning)">Semantic analysis</a></li> <li><a href="/wiki/Structured_prediction" title="Structured prediction">Structured prediction</a></li> <li><a href="/wiki/Feature_engineering" title="Feature engineering">Feature engineering</a></li> <li><a href="/wiki/Feature_learning" title="Feature learning">Feature learning</a></li> <li><a href="/wiki/Learning_to_rank" title="Learning to rank">Learning to rank</a></li> <li><a href="/wiki/Grammar_induction" title="Grammar induction">Grammar induction</a></li> <li><a href="/wiki/Ontology_learning" title="Ontology learning">Ontology learning</a></li> <li><a href="/wiki/Multimodal_learning" title="Multimodal learning">Multimodal learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0;"><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a><br /><span class="nobold"><span style="font-size:85%;">(<b><a href="/wiki/Statistical_classification" title="Statistical classification">classification</a></b> • <b><a href="/wiki/Regression_analysis" title="Regression analysis">regression</a></b>)</span></span> </div></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Apprenticeship_learning" title="Apprenticeship learning">Apprenticeship learning</a></li> <li><a href="/wiki/Decision_tree_learning" title="Decision tree learning">Decision trees</a></li> <li><a href="/wiki/Ensemble_learning" title="Ensemble learning">Ensembles</a> <ul><li><a href="/wiki/Bootstrap_aggregating" title="Bootstrap aggregating">Bagging</a></li> <li><a href="/wiki/Boosting_(machine_learning)" title="Boosting (machine learning)">Boosting</a></li> <li><a href="/wiki/Random_forest" title="Random forest">Random forest</a></li></ul></li> <li><a href="/wiki/K-nearest_neighbors_algorithm" title="K-nearest neighbors algorithm"><i>k</i>-NN</a></li> <li><a href="/wiki/Linear_regression" title="Linear regression">Linear regression</a></li> <li><a href="/wiki/Naive_Bayes_classifier" title="Naive Bayes classifier">Naive Bayes</a></li> <li><a href="/wiki/Artificial_neural_network" class="mw-redirect" title="Artificial neural network">Artificial neural networks</a></li> <li><a href="/wiki/Logistic_regression" title="Logistic regression">Logistic regression</a></li> <li><a href="/wiki/Perceptron" title="Perceptron">Perceptron</a></li> <li><a href="/wiki/Relevance_vector_machine" title="Relevance vector machine">Relevance vector machine (RVM)</a></li> <li><a href="/wiki/Support_vector_machine" title="Support vector machine">Support vector machine (SVM)</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/BIRCH" title="BIRCH">BIRCH</a></li> <li><a href="/wiki/CURE_algorithm" title="CURE algorithm">CURE</a></li> <li><a href="/wiki/Hierarchical_clustering" title="Hierarchical clustering">Hierarchical</a></li> <li><a href="/wiki/K-means_clustering" title="K-means clustering"><i>k</i>-means</a></li> <li><a href="/wiki/Fuzzy_clustering" title="Fuzzy clustering">Fuzzy</a></li> <li><a href="/wiki/Expectation%E2%80%93maximization_algorithm" title="Expectation–maximization algorithm">Expectation–maximization (EM)</a></li> <li><br /><a href="/wiki/DBSCAN" title="DBSCAN">DBSCAN</a></li> <li><a href="/wiki/OPTICS_algorithm" title="OPTICS algorithm">OPTICS</a></li> <li><a href="/wiki/Mean_shift" title="Mean shift">Mean shift</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Dimensionality_reduction" title="Dimensionality reduction">Dimensionality reduction</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Factor_analysis" title="Factor analysis">Factor analysis</a></li> <li><a href="/wiki/Canonical_correlation" title="Canonical correlation">CCA</a></li> <li><a href="/wiki/Independent_component_analysis" title="Independent component analysis">ICA</a></li> <li><a href="/wiki/Linear_discriminant_analysis" title="Linear discriminant analysis">LDA</a></li> <li><a href="/wiki/Non-negative_matrix_factorization" title="Non-negative matrix factorization">NMF</a></li> <li><a href="/wiki/Principal_component_analysis" title="Principal component analysis">PCA</a></li> <li><a href="/wiki/Proper_generalized_decomposition" title="Proper generalized decomposition">PGD</a></li> <li><a href="/wiki/T-distributed_stochastic_neighbor_embedding" title="T-distributed stochastic neighbor embedding">t-SNE</a></li> <li><a href="/wiki/Sparse_dictionary_learning" title="Sparse dictionary learning">SDL</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Structured_prediction" title="Structured prediction">Structured prediction</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Graphical_model" title="Graphical model">Graphical models</a> <ul><li><a href="/wiki/Bayesian_network" title="Bayesian network">Bayes net</a></li> <li><a href="/wiki/Conditional_random_field" title="Conditional random field">Conditional random field</a></li> <li><a href="/wiki/Hidden_Markov_model" title="Hidden Markov model">Hidden Markov</a></li></ul></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Anomaly_detection" title="Anomaly detection">Anomaly detection</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Random_sample_consensus" title="Random sample consensus">RANSAC</a></li> <li><a href="/wiki/K-nearest_neighbors_algorithm" title="K-nearest neighbors algorithm"><i>k</i>-NN</a></li> <li><a href="/wiki/Local_outlier_factor" title="Local outlier factor">Local outlier factor</a></li> <li><a href="/wiki/Isolation_forest" title="Isolation forest">Isolation forest</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Artificial_neural_network" class="mw-redirect" title="Artificial neural network">Artificial neural network</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li> <li><a href="/wiki/Feedforward_neural_network" title="Feedforward neural network">Feedforward neural network</a></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network</a> <ul><li><a href="/wiki/Long_short-term_memory" title="Long short-term memory">LSTM</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">GRU</a></li> <li><a href="/wiki/Echo_state_network" title="Echo state network">ESN</a></li> <li><a href="/wiki/Reservoir_computing" title="Reservoir computing">reservoir computing</a></li></ul></li> <li><a href="/wiki/Boltzmann_machine" title="Boltzmann machine">Boltzmann machine</a> <ul><li><a href="/wiki/Restricted_Boltzmann_machine" title="Restricted Boltzmann machine">Restricted</a></li></ul></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">GAN</a></li> <li><a href="/wiki/Diffusion_model" title="Diffusion model">Diffusion model</a></li> <li><a href="/wiki/Self-organizing_map" title="Self-organizing map">SOM</a></li> <li><a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">Convolutional neural network</a> <ul><li><a href="/wiki/U-Net" title="U-Net">U-Net</a></li> <li><a href="/wiki/LeNet" title="LeNet">LeNet</a></li> <li><a href="/wiki/AlexNet" title="AlexNet">AlexNet</a></li> <li><a href="/wiki/DeepDream" title="DeepDream">DeepDream</a></li></ul></li> <li><a href="/wiki/Neural_radiance_field" title="Neural radiance field">Neural radiance field</a></li> <li><a href="/wiki/Transformer_(machine_learning_model)" class="mw-redirect" title="Transformer (machine learning model)">Transformer</a> <ul><li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision</a></li></ul></li> <li><a href="/wiki/Mamba_(deep_learning_architecture)" title="Mamba (deep learning architecture)">Mamba</a></li> <li><a href="/wiki/Spiking_neural_network" title="Spiking neural network">Spiking neural network</a></li> <li><a href="/wiki/Memtransistor" title="Memtransistor">Memtransistor</a></li> <li><a href="/wiki/Electrochemical_RAM" title="Electrochemical RAM">Electrochemical RAM</a> (ECRAM)</li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Q-learning" title="Q-learning">Q-learning</a></li> <li><a href="/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action" title="State–action–reward–state–action">SARSA</a></li> <li><a href="/wiki/Temporal_difference_learning" title="Temporal difference learning">Temporal difference (TD)</a></li> <li><a href="/wiki/Multi-agent_reinforcement_learning" title="Multi-agent reinforcement learning">Multi-agent</a> <ul><li><a href="/wiki/Self-play_(reinforcement_learning_technique)" class="mw-redirect" title="Self-play (reinforcement learning technique)">Self-play</a></li></ul></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Learning with humans</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Active_learning_(machine_learning)" title="Active learning (machine learning)">Active learning</a></li> <li><a href="/wiki/Crowdsourcing" title="Crowdsourcing">Crowdsourcing</a></li> <li><a href="/wiki/Human-in-the-loop" title="Human-in-the-loop">Human-in-the-loop</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">RLHF</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Model diagnostics</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Coefficient_of_determination" title="Coefficient of determination">Coefficient of determination</a></li> <li><a href="/wiki/Confusion_matrix" title="Confusion matrix">Confusion matrix</a></li> <li><a href="/wiki/Learning_curve_(machine_learning)" title="Learning curve (machine learning)">Learning curve</a></li> <li><a href="/wiki/Receiver_operating_characteristic" title="Receiver operating characteristic">ROC curve</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Mathematical foundations</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Kernel_machines" class="mw-redirect" title="Kernel machines">Kernel machines</a></li> <li><a href="/wiki/Bias%E2%80%93variance_tradeoff" title="Bias–variance tradeoff">Bias–variance tradeoff</a></li> <li><a href="/wiki/Computational_learning_theory" title="Computational learning theory">Computational learning theory</a></li> <li><a href="/wiki/Empirical_risk_minimization" title="Empirical risk minimization">Empirical risk minimization</a></li> <li><a href="/wiki/Occam_learning" title="Occam learning">Occam learning</a></li> <li><a href="/wiki/Probably_approximately_correct_learning" title="Probably approximately correct learning">PAC learning</a></li> <li><a href="/wiki/Statistical_learning_theory" title="Statistical learning theory">Statistical learning</a></li> <li><a href="/wiki/Vapnik%E2%80%93Chervonenkis_theory" title="Vapnik–Chervonenkis theory">VC theory</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Journals and conferences</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/ECML_PKDD" title="ECML PKDD">ECML PKDD</a></li> <li><a href="/wiki/Conference_on_Neural_Information_Processing_Systems" title="Conference on Neural Information Processing Systems">NeurIPS</a></li> <li><a href="/wiki/International_Conference_on_Machine_Learning" title="International Conference on Machine Learning">ICML</a></li> <li><a href="/wiki/International_Conference_on_Learning_Representations" title="International Conference on Learning Representations">ICLR</a></li> <li><a href="/wiki/International_Joint_Conference_on_Artificial_Intelligence" title="International Joint Conference on Artificial Intelligence">IJCAI</a></li> <li><a href="/wiki/Machine_Learning_(journal)" title="Machine Learning (journal)">ML</a></li> <li><a href="/wiki/Journal_of_Machine_Learning_Research" title="Journal of Machine Learning Research">JMLR</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Related articles</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Glossary_of_artificial_intelligence" title="Glossary of artificial intelligence">Glossary of artificial intelligence</a></li> <li><a href="/wiki/List_of_datasets_for_machine-learning_research" title="List of datasets for machine-learning research">List of datasets for machine-learning research</a> <ul><li><a href="/wiki/List_of_datasets_in_computer_vision_and_image_processing" title="List of datasets in computer vision and image processing">List of datasets in computer vision and image processing</a></li></ul></li> <li><a href="/wiki/Outline_of_machine_learning" title="Outline of machine learning">Outline of machine learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-navbar"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Machine_learning" title="Template:Machine learning"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Machine_learning" title="Template talk:Machine learning"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Machine_learning" title="Special:EditPage/Template:Machine learning"><abbr title="Edit this template">e</abbr></a></li></ul></div></td></tr></tbody></table> <p>In <a href="/wiki/Computer_science" title="Computer science">computer science</a>, <b>online machine learning</b> is a method of <a href="/wiki/Machine_learning" title="Machine learning">machine learning</a> in which data becomes available in a sequential order and is used to update the best predictor for future data at each step, as opposed to batch learning techniques which generate the best predictor by learning on the entire <a href="/wiki/Training_data_set" class="mw-redirect" title="Training data set">training data set</a> at once. Online learning is a common technique used in areas of machine learning where it is computationally infeasible to train over the entire dataset, requiring the need of <a href="/wiki/Out-of-core" class="mw-redirect" title="Out-of-core">out-of-core</a> algorithms. It is also used in situations where it is necessary for the algorithm to dynamically adapt to new patterns in the data, or when the data itself is generated as a function of time, e.g., <a href="/wiki/Stock_market_prediction" title="Stock market prediction">stock price prediction</a>. Online learning algorithms may be prone to <a href="/wiki/Catastrophic_interference" title="Catastrophic interference">catastrophic interference</a>, a problem that can be addressed by <a href="/wiki/Incremental_learning" title="Incremental learning">incremental learning</a> approaches. </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="Introduction">Introduction</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=1" title="Edit section: Introduction"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>In the setting of <a href="/wiki/Supervised_learning" title="Supervised learning">supervised learning</a>, a function of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f:X\to Y}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> <mo>:</mo> <mi>X</mi> <mo stretchy="false">→<!-- → --></mo> <mi>Y</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f:X\to Y}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/abd1e080abef4bbdab67b43819c6431e7561361c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:10.583ex; height:2.509ex;" alt="{\displaystyle f:X\to Y}"></span> is to be learned, where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/68baa052181f707c662844a465bfeeb135e82bab" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.98ex; height:2.176ex;" alt="{\displaystyle X}"></span> is thought of as a space of inputs and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle Y}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>Y</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle Y}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/961d67d6b454b4df2301ac571808a3538b3a6d3f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.171ex; width:1.773ex; height:2.009ex;" alt="{\displaystyle Y}"></span> as a space of outputs, that predicts well on instances that are drawn from a <a href="/wiki/Joint_probability_distribution" title="Joint probability distribution">joint probability distribution</a> <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle p(x,y)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>p</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>y</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle p(x,y)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/089e91a1824e14cebc8e8d04dc652c61b3008e0a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; margin-left: -0.089ex; width:6.587ex; height:2.843ex;" alt="{\displaystyle p(x,y)}"></span> on <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X\times Y}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>X</mi> <mo>×<!-- × --></mo> <mi>Y</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X\times Y}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1613c1ff4b6fbfb6c80a8da83e90ad28f0ab3483" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:6.594ex; height:2.176ex;" alt="{\displaystyle X\times Y}"></span>. In reality, the learner never knows the true distribution <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle p(x,y)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>p</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>y</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle p(x,y)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/089e91a1824e14cebc8e8d04dc652c61b3008e0a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; margin-left: -0.089ex; width:6.587ex; height:2.843ex;" alt="{\displaystyle p(x,y)}"></span> over instances. Instead, the learner usually has access to a <a href="/wiki/Training_set" class="mw-redirect" title="Training set">training set</a> of examples <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (x_{1},y_{1}),\ldots ,(x_{n},y_{n})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <mo>…<!-- … --></mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (x_{1},y_{1}),\ldots ,(x_{n},y_{n})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b3459c38f5205bf3b65166ca15cdde5574997413" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:20.348ex; height:2.843ex;" alt="{\displaystyle (x_{1},y_{1}),\ldots ,(x_{n},y_{n})}"></span>. In this setting, the <a href="/wiki/Loss_function" title="Loss function">loss function</a> is given as <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle V:Y\times Y\to \mathbb {R} }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>V</mi> <mo>:</mo> <mi>Y</mi> <mo>×<!-- × --></mo> <mi>Y</mi> <mo stretchy="false">→<!-- → --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle V:Y\times Y\to \mathbb {R} }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/939f48aa832d9e4bf64c9e65199d8c56098c4caf" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:15.404ex; height:2.176ex;" alt="{\displaystyle V:Y\times Y\to \mathbb {R} }"></span>, such that <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle V(f(x),y)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>V</mi> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>,</mo> <mi>y</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle V(f(x),y)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2f381ec03a204bc068d396f8778f61925459fc01" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:10.204ex; height:2.843ex;" alt="{\displaystyle V(f(x),y)}"></span> measures the difference between the predicted value <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f(x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f(x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/202945cce41ecebb6f643f31d119c514bec7a074" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:4.418ex; height:2.843ex;" alt="{\displaystyle f(x)}"></span> and the true value <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle y}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>y</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle y}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b8a6208ec717213d4317e666f1ae872e00620a0d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.155ex; height:2.009ex;" alt="{\displaystyle y}"></span>. The ideal goal is to select a function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f\in {\mathcal {H}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> <mo>∈<!-- ∈ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">H</mi> </mrow> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f\in {\mathcal {H}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2634be9ba2369b9d92861b6d574a471eae58ff81" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:6.083ex; height:2.509ex;" alt="{\displaystyle f\in {\mathcal {H}}}"></span>, where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\mathcal {H}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">H</mi> </mrow> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\mathcal {H}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/19ef4c7b923a5125ac91aa491838a95ee15b804f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.964ex; height:2.176ex;" alt="{\displaystyle {\mathcal {H}}}"></span> is a space of functions called a hypothesis space, so that some notion of total loss is minimized. Depending on the type of model (statistical or adversarial), one can devise different notions of loss, which lead to different learning algorithms. </p> <div class="mw-heading mw-heading2"><h2 id="Statistical_view_of_online_learning">Statistical view of online learning</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=2" title="Edit section: Statistical view of online learning"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>In statistical learning models, the training sample <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (x_{i},y_{i})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (x_{i},y_{i})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d6dbb919b91ccacf17ed47898048428a1baf9703" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.912ex; height:2.843ex;" alt="{\displaystyle (x_{i},y_{i})}"></span> are assumed to have been drawn from the true distribution <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle p(x,y)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>p</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>y</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle p(x,y)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/089e91a1824e14cebc8e8d04dc652c61b3008e0a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; margin-left: -0.089ex; width:6.587ex; height:2.843ex;" alt="{\displaystyle p(x,y)}"></span> and the objective is to minimize the expected "risk" <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I[f]=\mathbb {E} [V(f(x),y)]=\int V(f(x),y)\,dp(x,y)\ .}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>I</mi> <mo stretchy="false">[</mo> <mi>f</mi> <mo stretchy="false">]</mo> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">E</mi> </mrow> <mo stretchy="false">[</mo> <mi>V</mi> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>,</mo> <mi>y</mi> <mo stretchy="false">)</mo> <mo stretchy="false">]</mo> <mo>=</mo> <mo>∫<!-- ∫ --></mo> <mi>V</mi> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>,</mo> <mi>y</mi> <mo stretchy="false">)</mo> <mspace width="thinmathspace" /> <mi>d</mi> <mi>p</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>y</mi> <mo stretchy="false">)</mo> <mtext> </mtext> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I[f]=\mathbb {E} [V(f(x),y)]=\int V(f(x),y)\,dp(x,y)\ .}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/237446394be1d6f2da9eec0e73de114a485687d7" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.338ex; width:45.101ex; height:5.676ex;" alt="{\displaystyle I[f]=\mathbb {E} [V(f(x),y)]=\int V(f(x),y)\,dp(x,y)\ .}"></span> A common paradigm in this situation is to estimate a function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\hat {f}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>f</mi> <mo stretchy="false">^<!-- ^ --></mo> </mover> </mrow> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\hat {f}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/14ce989fd75da938ec6f95a0cdb71037b23a11cb" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.699ex; height:3.176ex;" alt="{\displaystyle {\hat {f}}}"></span> through <a href="/wiki/Empirical_risk_minimization" title="Empirical risk minimization">empirical risk minimization</a> or regularized empirical risk minimization (usually <a href="/wiki/Tikhonov_regularization" class="mw-redirect" title="Tikhonov regularization">Tikhonov regularization</a>). The choice of loss function here gives rise to several well-known learning algorithms such as regularized <a href="/wiki/Least_squares" title="Least squares">least squares</a> and <a href="/wiki/Support_vector_machines" class="mw-redirect" title="Support vector machines">support vector machines</a>. A purely online model in this category would learn based on just the new input <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (x_{t+1},y_{t+1})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (x_{t+1},y_{t+1})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/deb5ac4033c4db1921ba4b12d1fdc701f0d75831" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:11.165ex; height:2.843ex;" alt="{\displaystyle (x_{t+1},y_{t+1})}"></span>, the current best predictor <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/874c306411e808e8191e8aeb95e3440e1c68d6e9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.965ex; height:2.509ex;" alt="{\displaystyle f_{t}}"></span> and some extra stored information (which is usually expected to have storage requirements independent of training data size). For many formulations, for example nonlinear <a href="/wiki/Kernel_methods" class="mw-redirect" title="Kernel methods">kernel methods</a>, true online learning is not possible, though a form of hybrid online learning with recursive algorithms can be used where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{t+1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{t+1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/393d15e53d6203ef7335a3ed55cfa5d418db4294" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:4.066ex; height:2.509ex;" alt="{\displaystyle f_{t+1}}"></span> is permitted to depend on <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/874c306411e808e8191e8aeb95e3440e1c68d6e9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.965ex; height:2.509ex;" alt="{\displaystyle f_{t}}"></span> and all previous data points <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (x_{1},y_{1}),\ldots ,(x_{t},y_{t})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <mo>…<!-- … --></mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (x_{1},y_{1}),\ldots ,(x_{t},y_{t})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/fb94c7629caf4ee2868da17d639d32188feb3597" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:19.563ex; height:2.843ex;" alt="{\displaystyle (x_{1},y_{1}),\ldots ,(x_{t},y_{t})}"></span>. In this case, the space requirements are no longer guaranteed to be constant since it requires storing all previous data points, but the solution may take less time to compute with the addition of a new data point, as compared to batch learning techniques. </p><p>A common strategy to overcome the above issues is to learn using mini-batches, which process a small batch of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle b\geq 1}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>b</mi> <mo>≥<!-- ≥ --></mo> <mn>1</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle b\geq 1}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2ca25b8344866ed3caaddc966d0bcbfb003ade6c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.505ex; width:5.258ex; height:2.343ex;" alt="{\displaystyle b\geq 1}"></span> data points at a time, this can be considered as pseudo-online learning for <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle b}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>b</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle b}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f11423fbb2e967f986e36804a8ae4271734917c3" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.998ex; height:2.176ex;" alt="{\displaystyle b}"></span> much smaller than the total number of training points. Mini-batch techniques are used with repeated passing over the training data to obtain optimized <a href="/wiki/Out-of-core" class="mw-redirect" title="Out-of-core">out-of-core</a> versions of machine learning algorithms, for example, <a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">stochastic gradient descent</a>. When combined with <a href="/wiki/Backpropagation" title="Backpropagation">backpropagation</a>, this is currently the de facto training method for training <a href="/wiki/Artificial_neural_networks" class="mw-redirect" title="Artificial neural networks">artificial neural networks</a>. </p> <div class="mw-heading mw-heading3"><h3 id="Example:_linear_least_squares">Example: linear least squares</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=3" title="Edit section: Example: linear least squares"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951"><div role="note" class="hatnote navigation-not-searchable">Main article: <a href="/wiki/Linear_least_squares_(mathematics)" class="mw-redirect" title="Linear least squares (mathematics)">Linear least squares (mathematics)</a></div> <p>The simple example of linear least squares is used to explain a variety of ideas in online learning. The ideas are general enough to be applied to other settings, for example, with other convex loss functions. </p> <div class="mw-heading mw-heading3"><h3 id="Batch_learning">Batch learning</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=4" title="Edit section: Batch learning"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Consider the setting of supervised learning with <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/132e57acb643253e7810ee9702d9581f159a1c61" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.279ex; height:2.509ex;" alt="{\displaystyle f}"></span> being a linear function to be learned: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f(x_{j})=\langle w,x_{j}\rangle =w\cdot x_{j}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>=</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>w</mi> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>=</mo> <mi>w</mi> <mo>⋅<!-- ⋅ --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f(x_{j})=\langle w,x_{j}\rangle =w\cdot x_{j}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1e6dd69280d9eb6ffd771da65be5a2126c99a2ad" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:23.854ex; height:3.009ex;" alt="{\displaystyle f(x_{j})=\langle w,x_{j}\rangle =w\cdot x_{j}}"></span> where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{j}\in \mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{j}\in \mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8e18fbfc87dc1f77e3578022c609e2918516c972" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:7.85ex; height:3.343ex;" alt="{\displaystyle x_{j}\in \mathbb {R} ^{d}}"></span> is a vector of inputs (data points) and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w\in \mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w\in \mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1a4ebc4af2c511658b3e378e73b257436dc61e2d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:7.275ex; height:2.676ex;" alt="{\displaystyle w\in \mathbb {R} ^{d}}"></span> is a linear filter vector. The goal is to compute the filter vector <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span>. To this end, a square loss function <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle V(f(x_{j}),y_{j})=(f(x_{j})-y_{j})^{2}=(\langle w,x_{j}\rangle -y_{j})^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>V</mi> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>=</mo> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>−<!-- − --></mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <msup> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo>=</mo> <mo stretchy="false">(</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>w</mi> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>−<!-- − --></mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <msup> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle V(f(x_{j}),y_{j})=(f(x_{j})-y_{j})^{2}=(\langle w,x_{j}\rangle -y_{j})^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/bcc76b1426014902f82424376228f16617d2f1f3" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:45.783ex; height:3.343ex;" alt="{\displaystyle V(f(x_{j}),y_{j})=(f(x_{j})-y_{j})^{2}=(\langle w,x_{j}\rangle -y_{j})^{2}}"></span> is used to compute the vector <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> that minimizes the empirical loss <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I_{n}[w]=\sum _{j=1}^{n}V(\langle w,x_{j}\rangle ,y_{j})=\sum _{j=1}^{n}(x_{j}^{\mathsf {T}}w-y_{j})^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>I</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> <mo stretchy="false">[</mo> <mi>w</mi> <mo stretchy="false">]</mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <mi>V</mi> <mo stretchy="false">(</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>w</mi> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <mo stretchy="false">(</mo> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <mi>w</mi> <mo>−<!-- − --></mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <msup> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I_{n}[w]=\sum _{j=1}^{n}V(\langle w,x_{j}\rangle ,y_{j})=\sum _{j=1}^{n}(x_{j}^{\mathsf {T}}w-y_{j})^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/915d00935aae63eb1179efec0b3a08330e296f50" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:44.018ex; height:7.176ex;" alt="{\displaystyle I_{n}[w]=\sum _{j=1}^{n}V(\langle w,x_{j}\rangle ,y_{j})=\sum _{j=1}^{n}(x_{j}^{\mathsf {T}}w-y_{j})^{2}}"></span> where <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle y_{j}\in \mathbb {R} .}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>∈<!-- ∈ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle y_{j}\in \mathbb {R} .}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/61ce684e989b65ab7dd7076f00185bce3b1c54a6" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:7.215ex; height:2.843ex;" alt="{\displaystyle y_{j}\in \mathbb {R} .}"></span> </p><p>Let <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/68baa052181f707c662844a465bfeeb135e82bab" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.98ex; height:2.176ex;" alt="{\displaystyle X}"></span> be the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i\times d}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> <mo>×<!-- × --></mo> <mi>d</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i\times d}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/66fe523de06a09ecddf014e17f0eb5c2788c1fa8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:4.859ex; height:2.176ex;" alt="{\displaystyle i\times d}"></span> data matrix and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle y\in \mathbb {R} ^{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>y</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle y\in \mathbb {R} ^{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/89d452dd1da1223b1b000784a7bd31921e12a19e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:6.474ex; height:3.009ex;" alt="{\displaystyle y\in \mathbb {R} ^{i}}"></span> is the column vector of target values after the arrival of the first <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/add78d8608ad86e54951b8c8bd6c8d8416533d20" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.802ex; height:2.176ex;" alt="{\displaystyle i}"></span> data points. Assuming that the covariance matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Sigma _{i}=X^{\mathsf {T}}X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Σ<!-- Σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Sigma _{i}=X^{\mathsf {T}}X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f90fee600100c929ece31a6e7ee26db9826c6ba2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:10.905ex; height:3.009ex;" alt="{\displaystyle \Sigma _{i}=X^{\mathsf {T}}X}"></span> is invertible (otherwise it is preferential to proceed in a similar fashion with Tikhonov regularization), the best solution <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f^{*}(x)=\langle w^{*},x\rangle }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msup> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <msup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msup> <mo>,</mo> <mi>x</mi> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f^{*}(x)=\langle w^{*},x\rangle }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/cf939613e9bcbee271dae3e0472b9acd150bd637" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:15.503ex; height:2.843ex;" alt="{\displaystyle f^{*}(x)=\langle w^{*},x\rangle }"></span> to the linear least squares problem is given by <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w^{*}=(X^{\mathsf {T}}X)^{-1}X^{\mathsf {T}}y=\Sigma _{i}^{-1}\sum _{j=1}^{i}x_{j}y_{j}.}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msup> <mo>=</mo> <mo stretchy="false">(</mo> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> <msup> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msup> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>y</mi> <mo>=</mo> <msubsup> <mi mathvariant="normal">Σ<!-- Σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msubsup> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </munderover> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w^{*}=(X^{\mathsf {T}}X)^{-1}X^{\mathsf {T}}y=\Sigma _{i}^{-1}\sum _{j=1}^{i}x_{j}y_{j}.}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/bbdbf8fdc6fdafc07ddeb78f08b1bb46e1a5d933" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:35.965ex; height:7.509ex;" alt="{\displaystyle w^{*}=(X^{\mathsf {T}}X)^{-1}X^{\mathsf {T}}y=\Sigma _{i}^{-1}\sum _{j=1}^{i}x_{j}y_{j}.}"></span> </p><p>Now, calculating the covariance matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Sigma _{i}=\sum _{j=1}^{i}x_{j}x_{j}^{\mathsf {T}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Σ<!-- Σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </munderover> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Sigma _{i}=\sum _{j=1}^{i}x_{j}x_{j}^{\mathsf {T}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/788eccbd77d01a6f397ad02ab7fb4e0b54f296b6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:14.239ex; height:7.509ex;" alt="{\displaystyle \Sigma _{i}=\sum _{j=1}^{i}x_{j}x_{j}^{\mathsf {T}}}"></span> takes time <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(id^{2})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>i</mi> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(id^{2})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5ea2c75ae97c0821ace838351d558b113709eb85" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.657ex; height:3.176ex;" alt="{\displaystyle O(id^{2})}"></span>, inverting the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle d\times d}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>d</mi> <mo>×<!-- × --></mo> <mi>d</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle d\times d}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8832c0aa14719499bb50f640be353abb7f37f069" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.272ex; height:2.176ex;" alt="{\displaystyle d\times d}"></span> matrix takes time <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(d^{3})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>3</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(d^{3})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d688ed8415cdd3e1f53b706d3da73f69c0ea0144" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.855ex; height:3.176ex;" alt="{\displaystyle O(d^{3})}"></span>, while the rest of the multiplication takes time <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(d^{2})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(d^{2})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b184eaa03aff77b01bc4c0681501df7505dd4292" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.855ex; height:3.176ex;" alt="{\displaystyle O(d^{2})}"></span>, giving a total time of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(id^{2}+d^{3})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>i</mi> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo>+</mo> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>3</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(id^{2}+d^{3})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5dfe9793032810d52d4facb100516321f2dfcbb5" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:11.77ex; height:3.176ex;" alt="{\displaystyle O(id^{2}+d^{3})}"></span>. When there are <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a601995d55609f2d9f5e233e36fbe9ea26011b3b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.395ex; height:1.676ex;" alt="{\displaystyle n}"></span> total points in the dataset, to recompute the solution after the arrival of every datapoint <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i=1,\ldots ,n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> <mo>=</mo> <mn>1</mn> <mo>,</mo> <mo>…<!-- … --></mo> <mo>,</mo> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i=1,\ldots ,n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a5726d00b79af1b4666a6319c45381579dc85a9a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:11.636ex; height:2.509ex;" alt="{\displaystyle i=1,\ldots ,n}"></span>, the naive approach will have a total complexity <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(n^{2}d^{2}+nd^{3})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo>+</mo> <mi>n</mi> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>3</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(n^{2}d^{2}+nd^{3})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e1b728ed308010eca3bd02442315bafda0518af8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:14.811ex; height:3.176ex;" alt="{\displaystyle O(n^{2}d^{2}+nd^{3})}"></span>. Note that when storing the matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Sigma _{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Σ<!-- Σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Sigma _{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7311e26a182cf249a67308c6ceb4d12d2c6896b8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.478ex; height:2.509ex;" alt="{\displaystyle \Sigma _{i}}"></span>, then updating it at each step needs only adding <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{i+1}x_{i+1}^{\mathsf {T}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>+</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{i+1}x_{i+1}^{\mathsf {T}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f368180e4a9b2e950cb40ad055ea0eb508f181e4" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.171ex; width:8.46ex; height:3.343ex;" alt="{\displaystyle x_{i+1}x_{i+1}^{\mathsf {T}}}"></span>, which takes <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(d^{2})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(d^{2})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b184eaa03aff77b01bc4c0681501df7505dd4292" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.855ex; height:3.176ex;" alt="{\displaystyle O(d^{2})}"></span> time, reducing the total time to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(nd^{2}+nd^{3})=O(nd^{3})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>n</mi> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo>+</mo> <mi>n</mi> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>3</mn> </mrow> </msup> <mo stretchy="false">)</mo> <mo>=</mo> <mi>O</mi> <mo stretchy="false">(</mo> <mi>n</mi> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>3</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(nd^{2}+nd^{3})=O(nd^{3})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7826cbfd4595a6c5b92b5dbd47f70ae6eeb7d10d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:24.105ex; height:3.176ex;" alt="{\displaystyle O(nd^{2}+nd^{3})=O(nd^{3})}"></span>, but with an additional storage space of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(d^{2})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(d^{2})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b184eaa03aff77b01bc4c0681501df7505dd4292" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.855ex; height:3.176ex;" alt="{\displaystyle O(d^{2})}"></span> to store <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Sigma _{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Σ<!-- Σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Sigma _{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7311e26a182cf249a67308c6ceb4d12d2c6896b8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.478ex; height:2.509ex;" alt="{\displaystyle \Sigma _{i}}"></span>.<sup id="cite_ref-lorenzo_1-0" class="reference"><a href="#cite_note-lorenzo-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Online_learning:_recursive_least_squares">Online learning: recursive least squares</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=5" title="Edit section: Online learning: recursive least squares"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The recursive least squares (RLS) algorithm considers an online approach to the least squares problem. It can be shown that by initialising <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \textstyle w_{0}=0\in \mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mstyle displaystyle="false" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> <mo>=</mo> <mn>0</mn> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \textstyle w_{0}=0\in \mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/705559eea79df0dda6c0b92aec5fb6b470656364" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:12.59ex; height:3.009ex;" alt="{\displaystyle \textstyle w_{0}=0\in \mathbb {R} ^{d}}"></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \textstyle \Gamma _{0}=I\in \mathbb {R} ^{d\times d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mstyle displaystyle="false" scriptlevel="0"> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> <mo>=</mo> <mi>I</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> <mo>×<!-- × --></mo> <mi>d</mi> </mrow> </msup> </mstyle> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \textstyle \Gamma _{0}=I\in \mathbb {R} ^{d\times d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0204688743496375136edeb9e9ffaf1b1af5c199" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:14.526ex; height:3.009ex;" alt="{\displaystyle \textstyle \Gamma _{0}=I\in \mathbb {R} ^{d\times d}}"></span>, the solution of the linear least squares problem given in the previous section can be computed by the following iteration: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Gamma _{i}=\Gamma _{i-1}-{\frac {\Gamma _{i-1}x_{i}x_{i}^{\mathsf {T}}\Gamma _{i-1}}{1+x_{i}^{\mathsf {T}}\Gamma _{i-1}x_{i}}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> </mrow> <mrow> <mn>1</mn> <mo>+</mo> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mrow> </mfrac> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Gamma _{i}=\Gamma _{i-1}-{\frac {\Gamma _{i-1}x_{i}x_{i}^{\mathsf {T}}\Gamma _{i-1}}{1+x_{i}^{\mathsf {T}}\Gamma _{i-1}x_{i}}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a66dd23a223141ad6f7108422e6ace6874e15f75" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:26.897ex; height:7.176ex;" alt="{\displaystyle \Gamma _{i}=\Gamma _{i-1}-{\frac {\Gamma _{i-1}x_{i}x_{i}^{\mathsf {T}}\Gamma _{i-1}}{1+x_{i}^{\mathsf {T}}\Gamma _{i-1}x_{i}}}}"></span> <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{i}=w_{i-1}-\Gamma _{i}x_{i}\left(x_{i}^{\mathsf {T}}w_{i-1}-y_{i}\right)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mrow> <mo>(</mo> <mrow> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mrow> <mo>)</mo> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{i}=w_{i-1}-\Gamma _{i}x_{i}\left(x_{i}^{\mathsf {T}}w_{i-1}-y_{i}\right)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6d2839e80235a430508cf84e988181949bb3aa50" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:31.891ex; height:3.176ex;" alt="{\displaystyle w_{i}=w_{i-1}-\Gamma _{i}x_{i}\left(x_{i}^{\mathsf {T}}w_{i-1}-y_{i}\right)}"></span> The above iteration algorithm can be proved using induction on <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/add78d8608ad86e54951b8c8bd6c8d8416533d20" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.802ex; height:2.176ex;" alt="{\displaystyle i}"></span>.<sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup> The proof also shows that <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Gamma _{i}=\Sigma _{i}^{-1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msubsup> <mi mathvariant="normal">Σ<!-- Σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Gamma _{i}=\Sigma _{i}^{-1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7ff55e32e980ae75dfc31b4b0a5f248b297889f8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:9.362ex; height:3.343ex;" alt="{\displaystyle \Gamma _{i}=\Sigma _{i}^{-1}}"></span>. One can look at RLS also in the context of adaptive filters (see <a href="/wiki/Recursive_least_squares" class="mw-redirect" title="Recursive least squares">RLS</a>). </p><p>The complexity for <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a601995d55609f2d9f5e233e36fbe9ea26011b3b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.395ex; height:1.676ex;" alt="{\displaystyle n}"></span> steps of this algorithm is <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(nd^{2})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>n</mi> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(nd^{2})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6ad3dd0e39bb135e61123a5551b34e8ea33cab1e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.25ex; height:3.176ex;" alt="{\displaystyle O(nd^{2})}"></span>, which is an order of magnitude faster than the corresponding batch learning complexity. The storage requirements at every step <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/add78d8608ad86e54951b8c8bd6c8d8416533d20" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.802ex; height:2.176ex;" alt="{\displaystyle i}"></span> here are to store the matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Gamma _{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Gamma _{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4497c779a32aa954cc5706fb7f4c2abbe1176dec" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.252ex; height:2.509ex;" alt="{\displaystyle \Gamma _{i}}"></span>, which is constant at <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(d^{2})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>d</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(d^{2})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b184eaa03aff77b01bc4c0681501df7505dd4292" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.855ex; height:3.176ex;" alt="{\displaystyle O(d^{2})}"></span>. For the case when <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Sigma _{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Σ<!-- Σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Sigma _{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7311e26a182cf249a67308c6ceb4d12d2c6896b8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.478ex; height:2.509ex;" alt="{\displaystyle \Sigma _{i}}"></span> is not invertible, consider the regularised version of the problem loss function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \sum _{j=1}^{n}\left(x_{j}^{\mathsf {T}}w-y_{j}\right)^{2}+\lambda \left\|w\right\|_{2}^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <msup> <mrow> <mo>(</mo> <mrow> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <mi>w</mi> <mo>−<!-- − --></mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> </mrow> <mo>)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo>+</mo> <mi>λ<!-- λ --></mi> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \sum _{j=1}^{n}\left(x_{j}^{\mathsf {T}}w-y_{j}\right)^{2}+\lambda \left\|w\right\|_{2}^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7b85f051f2ef8f7697f99c2ecc2d77d625f08c77" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:26.045ex; height:7.176ex;" alt="{\displaystyle \sum _{j=1}^{n}\left(x_{j}^{\mathsf {T}}w-y_{j}\right)^{2}+\lambda \left\|w\right\|_{2}^{2}}"></span>. Then, it's easy to show that the same algorithm works with <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Gamma _{0}=(I+\lambda I)^{-1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> <mo>=</mo> <mo stretchy="false">(</mo> <mi>I</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>I</mi> <msup> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Gamma _{0}=(I+\lambda I)^{-1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d0439b876beea27010d8d030f1168466bf4fef11" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:16.287ex; height:3.176ex;" alt="{\displaystyle \Gamma _{0}=(I+\lambda I)^{-1}}"></span>, and the iterations proceed to give <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Gamma _{i}=(\Sigma _{i}+\lambda I)^{-1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <mo stretchy="false">(</mo> <msub> <mi mathvariant="normal">Σ<!-- Σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>I</mi> <msup> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Gamma _{i}=(\Sigma _{i}+\lambda I)^{-1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b125856ff14daab5f793c6121c952e2c64a146f9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:17.338ex; height:3.176ex;" alt="{\displaystyle \Gamma _{i}=(\Sigma _{i}+\lambda I)^{-1}}"></span>.<sup id="cite_ref-lorenzo_1-1" class="reference"><a href="#cite_note-lorenzo-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Stochastic_gradient_descent">Stochastic gradient descent</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=6" title="Edit section: Stochastic gradient descent"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951"><div role="note" class="hatnote navigation-not-searchable">Main article: <a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">Stochastic gradient descent</a></div> <p>When this <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{i}=w_{i-1}-\Gamma _{i}x_{i}\left(x_{i}^{\mathsf {T}}w_{i-1}-y_{i}\right)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mrow> <mo>(</mo> <mrow> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mrow> <mo>)</mo> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{i}=w_{i-1}-\Gamma _{i}x_{i}\left(x_{i}^{\mathsf {T}}w_{i-1}-y_{i}\right)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6d2839e80235a430508cf84e988181949bb3aa50" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:31.891ex; height:3.176ex;" alt="{\displaystyle w_{i}=w_{i-1}-\Gamma _{i}x_{i}\left(x_{i}^{\mathsf {T}}w_{i-1}-y_{i}\right)}"></span> is replaced by <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{i}=w_{i-1}-\gamma _{i}x_{i}\left(x_{i}^{\mathsf {T}}w_{i-1}-y_{i}\right)=w_{i-1}-\gamma _{i}\nabla V(\langle w_{i-1},x_{i}\rangle ,y_{i})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi>γ<!-- γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mrow> <mo>(</mo> <mrow> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mrow> <mo>)</mo> </mrow> <mo>=</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi>γ<!-- γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mi mathvariant="normal">∇<!-- ∇ --></mi> <mi>V</mi> <mo stretchy="false">(</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{i}=w_{i-1}-\gamma _{i}x_{i}\left(x_{i}^{\mathsf {T}}w_{i-1}-y_{i}\right)=w_{i-1}-\gamma _{i}\nabla V(\langle w_{i-1},x_{i}\rangle ,y_{i})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9ffc3c64dc7cac17b19e6764c8baf23090da3a4e" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:62.191ex; height:3.176ex;" alt="{\displaystyle w_{i}=w_{i-1}-\gamma _{i}x_{i}\left(x_{i}^{\mathsf {T}}w_{i-1}-y_{i}\right)=w_{i-1}-\gamma _{i}\nabla V(\langle w_{i-1},x_{i}\rangle ,y_{i})}"></span> or <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Gamma _{i}\in \mathbb {R} ^{d\times d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Γ<!-- Γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> <mo>×<!-- × --></mo> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Gamma _{i}\in \mathbb {R} ^{d\times d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e2366c73e880e6b05c3f0bc25e208465c0ca9536" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:10.001ex; height:3.009ex;" alt="{\displaystyle \Gamma _{i}\in \mathbb {R} ^{d\times d}}"></span> by <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \gamma _{i}\in \mathbb {R} }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>γ<!-- γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>∈<!-- ∈ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \gamma _{i}\in \mathbb {R} }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/cd887ef9a6242fbad4174fb2ea5bd0dd34df8f18" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.523ex; height:2.676ex;" alt="{\displaystyle \gamma _{i}\in \mathbb {R} }"></span>, this becomes the stochastic gradient descent algorithm. In this case, the complexity for <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a601995d55609f2d9f5e233e36fbe9ea26011b3b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.395ex; height:1.676ex;" alt="{\displaystyle n}"></span> steps of this algorithm reduces to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(nd)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>n</mi> <mi>d</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(nd)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d627081a425c8784d1e2f4da283e073fd960e881" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.193ex; height:2.843ex;" alt="{\displaystyle O(nd)}"></span>. The storage requirements at every step <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/add78d8608ad86e54951b8c8bd6c8d8416533d20" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.802ex; height:2.176ex;" alt="{\displaystyle i}"></span> are constant at <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(d)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>d</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(d)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6e323ee705f0664132bf796619cf0e2b36a1c396" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:4.798ex; height:2.843ex;" alt="{\displaystyle O(d)}"></span>. </p><p>However, the stepsize <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \gamma _{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>γ<!-- γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \gamma _{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/907e4a28946d45e5cbf3cf3c6c48de68296039c4" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:2.004ex; height:2.176ex;" alt="{\displaystyle \gamma _{i}}"></span> needs to be chosen carefully to solve the expected risk minimization problem, as detailed above. By choosing a decaying step size <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \gamma _{i}\approx {\frac {1}{\sqrt {i}}},}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>γ<!-- γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>≈<!-- ≈ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <msqrt> <mi>i</mi> </msqrt> </mfrac> </mrow> <mo>,</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \gamma _{i}\approx {\frac {1}{\sqrt {i}}},}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/cefcfa8e97ed0c7352edacaeb4b05affe02bbb75" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.838ex; width:9.324ex; height:6.176ex;" alt="{\displaystyle \gamma _{i}\approx {\frac {1}{\sqrt {i}}},}"></span> one can prove the convergence of the average iterate <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle {\overline {w}}_{n}={\frac {1}{n}}\sum _{i=1}^{n}w_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <msub> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>w</mi> <mo accent="false">¯<!-- ¯ --></mo> </mover> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle {\overline {w}}_{n}={\frac {1}{n}}\sum _{i=1}^{n}w_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7df1d57229bf6db929c5b55268517604405cf7ac" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:16.51ex; height:3.343ex;" alt="{\textstyle {\overline {w}}_{n}={\frac {1}{n}}\sum _{i=1}^{n}w_{i}}"></span>. This setting is a special case of <a href="/wiki/Stochastic_optimization" title="Stochastic optimization">stochastic optimization</a>, a well known problem in optimization.<sup id="cite_ref-lorenzo_1-2" class="reference"><a href="#cite_note-lorenzo-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Incremental_stochastic_gradient_descent">Incremental stochastic gradient descent</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=7" title="Edit section: Incremental stochastic gradient descent"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>In practice, one can perform multiple stochastic gradient passes (also called cycles or epochs) over the data. The algorithm thus obtained is called incremental gradient method and corresponds to an iteration <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{i}=w_{i-1}-\gamma _{i}\nabla V(\langle w_{i-1},x_{t_{i}}\rangle ,y_{t_{i}})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi>γ<!-- γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mi mathvariant="normal">∇<!-- ∇ --></mi> <mi>V</mi> <mo stretchy="false">(</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <msub> <mi>t</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <msub> <mi>t</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{i}=w_{i-1}-\gamma _{i}\nabla V(\langle w_{i-1},x_{t_{i}}\rangle ,y_{t_{i}})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/fae97f0e3ca7e431873a9de0ea6d6937c2a1a49f" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:34.316ex; height:3.009ex;" alt="{\displaystyle w_{i}=w_{i-1}-\gamma _{i}\nabla V(\langle w_{i-1},x_{t_{i}}\rangle ,y_{t_{i}})}"></span> The main difference with the stochastic gradient method is that here a sequence <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>t</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8b61e3d4d909be4a19c9a554a301684232f59e5a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.639ex; height:2.343ex;" alt="{\displaystyle t_{i}}"></span> is chosen to decide which training point is visited in the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/add78d8608ad86e54951b8c8bd6c8d8416533d20" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.802ex; height:2.176ex;" alt="{\displaystyle i}"></span>-th step. Such a sequence can be stochastic or deterministic. The number of iterations is then decoupled to the number of points (each point can be considered more than once). The incremental gradient method can be shown to provide a minimizer to the empirical risk.<sup id="cite_ref-bertsekas_3-0" class="reference"><a href="#cite_note-bertsekas-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup> Incremental techniques can be advantageous when considering objective functions made up of a sum of many terms e.g. an empirical error corresponding to a very large dataset.<sup id="cite_ref-lorenzo_1-3" class="reference"><a href="#cite_note-lorenzo-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Kernel_methods">Kernel methods</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=8" title="Edit section: Kernel methods"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951"><div role="note" class="hatnote navigation-not-searchable">See also: <a href="/wiki/Kernel_method" title="Kernel method">Kernel method</a></div> <p>Kernels can be used to extend the above algorithms to non-parametric models (or models where the parameters form an infinite dimensional space). The corresponding procedure will no longer be truly online and instead involve storing all the data points, but is still faster than the brute force method. This discussion is restricted to the case of the square loss, though it can be extended to any convex loss. It can be shown by an easy induction <sup id="cite_ref-lorenzo_1-4" class="reference"><a href="#cite_note-lorenzo-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> that if <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/af4a0955af42beb5f85aa05fb8c07abedc13990d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.724ex; height:2.509ex;" alt="{\displaystyle X_{i}}"></span> is the data matrix and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/fe22f0329d3ecb2e1880d44d191aba0e5475db68" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.464ex; height:2.009ex;" alt="{\displaystyle w_{i}}"></span> is the output after <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/add78d8608ad86e54951b8c8bd6c8d8416533d20" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.802ex; height:2.176ex;" alt="{\displaystyle i}"></span> steps of the <a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">SGD</a> algorithm, then, <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{i}=X_{i}^{\mathsf {T}}c_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msubsup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{i}=X_{i}^{\mathsf {T}}c_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/027a4c2403a4b04845a18c749d6ca9bcd59daa16" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:10.717ex; height:3.176ex;" alt="{\displaystyle w_{i}=X_{i}^{\mathsf {T}}c_{i}}"></span> where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{i}=((c_{i})_{1},(c_{i})_{2},...,(c_{i})_{i})\in \mathbb {R} ^{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <mo stretchy="false">(</mo> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>,</mo> <mo>.</mo> <mo>.</mo> <mo>.</mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{i}=((c_{i})_{1},(c_{i})_{2},...,(c_{i})_{i})\in \mathbb {R} ^{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e55b7455466ac2ed15f44b1f726f57f88bd77760" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:31.992ex; height:3.176ex;" alt="{\displaystyle c_{i}=((c_{i})_{1},(c_{i})_{2},...,(c_{i})_{i})\in \mathbb {R} ^{i}}"></span> and the sequence <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/01acb7953ba52c2aa44264b5d0f8fd223aa178a2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.807ex; height:2.009ex;" alt="{\displaystyle c_{i}}"></span> satisfies the recursion: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{0}=0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> <mo>=</mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{0}=0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/29af3d4e887815bb3b9b9eab4f7540a376fccd73" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:6.322ex; height:2.509ex;" alt="{\displaystyle c_{0}=0}"></span> <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (c_{i})_{j}=(c_{i-1})_{j},j=1,2,...,i-1}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>=</mo> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>,</mo> <mi>j</mi> <mo>=</mo> <mn>1</mn> <mo>,</mo> <mn>2</mn> <mo>,</mo> <mo>.</mo> <mo>.</mo> <mo>.</mo> <mo>,</mo> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (c_{i})_{j}=(c_{i-1})_{j},j=1,2,...,i-1}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e4f9a0002c727d6162eaf7e8422526d6ebb8ee49" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:32.674ex; height:3.009ex;" alt="{\displaystyle (c_{i})_{j}=(c_{i-1})_{j},j=1,2,...,i-1}"></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (c_{i})_{i}=\gamma _{i}{\Big (}y_{i}-\sum _{j=1}^{i-1}(c_{i-1})_{j}\langle x_{j},x_{i}\rangle {\Big )}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>γ<!-- γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mo maxsize="1.623em" minsize="1.623em">(</mo> </mrow> </mrow> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>−<!-- − --></mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mo maxsize="1.623em" minsize="1.623em">)</mo> </mrow> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (c_{i})_{i}=\gamma _{i}{\Big (}y_{i}-\sum _{j=1}^{i-1}(c_{i-1})_{j}\langle x_{j},x_{i}\rangle {\Big )}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/42e34e123abeee13bd9d0a81aeac12f9211f9005" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:34.266ex; height:7.676ex;" alt="{\displaystyle (c_{i})_{i}=\gamma _{i}{\Big (}y_{i}-\sum _{j=1}^{i-1}(c_{i-1})_{j}\langle x_{j},x_{i}\rangle {\Big )}}"></span> Notice that here <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \langle x_{j},x_{i}\rangle }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \langle x_{j},x_{i}\rangle }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3c3ff392933a4e4200440f1955f96374a4cfd8f7" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:7.212ex; height:3.009ex;" alt="{\displaystyle \langle x_{j},x_{i}\rangle }"></span> is just the standard Kernel on <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a713426956296f1668fce772df3c60b9dde8a685" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.77ex; height:2.676ex;" alt="{\displaystyle \mathbb {R} ^{d}}"></span>, and the predictor is of the form <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{i}(x)=\langle w_{i-1},x\rangle =\sum _{j=1}^{i-1}(c_{i-1})_{j}\langle x_{j},x\rangle .}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>,</mo> <mi>x</mi> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>,</mo> <mi>x</mi> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{i}(x)=\langle w_{i-1},x\rangle =\sum _{j=1}^{i-1}(c_{i-1})_{j}\langle x_{j},x\rangle .}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f03792a33de2b1c9bffca34ed3f9d78849d54ab8" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:37.052ex; height:7.676ex;" alt="{\displaystyle f_{i}(x)=\langle w_{i-1},x\rangle =\sum _{j=1}^{i-1}(c_{i-1})_{j}\langle x_{j},x\rangle .}"></span> </p><p><br /> Now, if a general kernel <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2b76fce82a62ed5461908f0dc8f037de4e3686b0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.066ex; height:2.176ex;" alt="{\displaystyle K}"></span> is introduced instead and let the predictor be <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{i}(x)=\sum _{j=1}^{i-1}(c_{i-1})_{j}K(x_{j},x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>,</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{i}(x)=\sum _{j=1}^{i-1}(c_{i-1})_{j}K(x_{j},x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/03e34890f01157953b3c2e4cf9b7d4a2894352fb" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:26.636ex; height:7.676ex;" alt="{\displaystyle f_{i}(x)=\sum _{j=1}^{i-1}(c_{i-1})_{j}K(x_{j},x)}"></span> then the same proof will also show that predictor minimising the least squares loss is obtained by changing the above recursion to <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (c_{i})_{i}=\gamma _{i}{\Big (}y_{i}-\sum _{j=1}^{i-1}(c_{i-1})_{j}K(x_{j},x_{i}){\Big )}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>γ<!-- γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mo maxsize="1.623em" minsize="1.623em">(</mo> </mrow> </mrow> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>−<!-- − --></mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mo maxsize="1.623em" minsize="1.623em">)</mo> </mrow> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (c_{i})_{i}=\gamma _{i}{\Big (}y_{i}-\sum _{j=1}^{i-1}(c_{i-1})_{j}K(x_{j},x_{i}){\Big )}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/86b9e3c39a28c669b174001027cc216197155076" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:36.332ex; height:7.676ex;" alt="{\displaystyle (c_{i})_{i}=\gamma _{i}{\Big (}y_{i}-\sum _{j=1}^{i-1}(c_{i-1})_{j}K(x_{j},x_{i}){\Big )}}"></span> The above expression requires storing all the data for updating <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/01acb7953ba52c2aa44264b5d0f8fd223aa178a2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.807ex; height:2.009ex;" alt="{\displaystyle c_{i}}"></span>. The total time complexity for the recursion when evaluating for the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a601995d55609f2d9f5e233e36fbe9ea26011b3b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.395ex; height:1.676ex;" alt="{\displaystyle n}"></span>-th datapoint is <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(n^{2}dk)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mi>d</mi> <mi>k</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(n^{2}dk)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1e16d08a5cc43315d54e67fdc304b4ac645be891" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:8.459ex; height:3.176ex;" alt="{\displaystyle O(n^{2}dk)}"></span>, where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle k}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>k</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle k}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c3c9a2c7b599b37105512c5d570edc034056dd40" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.211ex; height:2.176ex;" alt="{\displaystyle k}"></span> is the cost of evaluating the kernel on a single pair of points.<sup id="cite_ref-lorenzo_1-5" class="reference"><a href="#cite_note-lorenzo-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> Thus, the use of the kernel has allowed the movement from a finite dimensional parameter space <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \textstyle w_{i}\in \mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mstyle displaystyle="false" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \textstyle w_{i}\in \mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e501c93c63db39cee8f8585f7c791dffa514bf63" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:8.075ex; height:3.009ex;" alt="{\displaystyle \textstyle w_{i}\in \mathbb {R} ^{d}}"></span> to a possibly infinite dimensional feature represented by a kernel <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2b76fce82a62ed5461908f0dc8f037de4e3686b0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.066ex; height:2.176ex;" alt="{\displaystyle K}"></span> by instead performing the recursion on the space of parameters <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \textstyle c_{i}\in \mathbb {R} ^{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mstyle displaystyle="false" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msup> </mstyle> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \textstyle c_{i}\in \mathbb {R} ^{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e488a21fc80f8cdacfcfca57c446178bc6bb7785" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:7.125ex; height:3.009ex;" alt="{\displaystyle \textstyle c_{i}\in \mathbb {R} ^{i}}"></span>, whose dimension is the same as the size of the training dataset. In general, this is a consequence of the <a href="/wiki/Representer_theorem" title="Representer theorem">representer theorem</a>.<sup id="cite_ref-lorenzo_1-6" class="reference"><a href="#cite_note-lorenzo-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Online_convex_optimization">Online convex optimization</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=9" title="Edit section: Online convex optimization"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Online convex optimization (OCO) <sup id="cite_ref-4" class="reference"><a href="#cite_note-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup> is a general framework for decision making which leverages <a href="/wiki/Convex_optimization" title="Convex optimization">convex optimization</a> to allow for efficient algorithms. The framework is that of repeated game playing as follows: </p><p>For <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t=1,2,...,T}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> <mo>=</mo> <mn>1</mn> <mo>,</mo> <mn>2</mn> <mo>,</mo> <mo>.</mo> <mo>.</mo> <mo>.</mo> <mo>,</mo> <mi>T</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t=1,2,...,T}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9e782a9b81091d11641be9afacfd9b02a85e3e81" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:14.103ex; height:2.509ex;" alt="{\displaystyle t=1,2,...,T}"></span> </p> <ul><li>Learner receives input <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f279a30bc8eabc788f3fe81c9cfb674e72e858db" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.156ex; height:2.009ex;" alt="{\displaystyle x_{t}}"></span></li> <li>Learner outputs <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6cc7b029066adbf68868f39f3dceb58eab2d1a12" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.49ex; height:2.009ex;" alt="{\displaystyle w_{t}}"></span> from a fixed convex set <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle S}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>S</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle S}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4611d85173cd3b508e67077d4a1252c9c05abca2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.499ex; height:2.176ex;" alt="{\displaystyle S}"></span></li> <li>Nature sends back a convex loss function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}:S\rightarrow \mathbb {R} }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>:</mo> <mi>S</mi> <mo stretchy="false">→<!-- → --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}:S\rightarrow \mathbb {R} }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f4aac9a64a3f96504a3b330866a2494dda6d9968" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:10.682ex; height:2.509ex;" alt="{\displaystyle v_{t}:S\rightarrow \mathbb {R} }"></span>.</li> <li>Learner suffers loss <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}(w_{t})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}(w_{t})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/576bc9c7780495a7cfe56b9d746665d29d4adcd0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.253ex; height:2.843ex;" alt="{\displaystyle v_{t}(w_{t})}"></span> and updates its model</li></ul> <p>The goal is to minimize <a href="/wiki/Regret_(decision_theory)" title="Regret (decision theory)">regret</a>, or the difference between cumulative loss and the loss of the best fixed point <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle u\in S}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>u</mi> <mo>∈<!-- ∈ --></mo> <mi>S</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle u\in S}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a81bd6308fa73ba1bec537a7b5f20429936b0fa2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.67ex; height:2.176ex;" alt="{\displaystyle u\in S}"></span> in hindsight. As an example, consider the case of online least squares linear regression. Here, the weight vectors come from the convex set <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle S=\mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>S</mi> <mo>=</mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle S=\mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0b0bf04d6266ea81c5279a23971eacd19af797b2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:7.368ex; height:2.676ex;" alt="{\displaystyle S=\mathbb {R} ^{d}}"></span>, and nature sends back the convex loss function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}(w)=(\langle w,x_{t}\rangle -y_{t})^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mo stretchy="false">(</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>w</mi> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>−<!-- − --></mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <msup> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}(w)=(\langle w,x_{t}\rangle -y_{t})^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/db2a94478ee70fb26e74740bb1ea6b930b6ce7fb" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:22.858ex; height:3.176ex;" alt="{\displaystyle v_{t}(w)=(\langle w,x_{t}\rangle -y_{t})^{2}}"></span>. Note here that <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle y_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle y_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0fe9554452b93508c9d2479414a45981ecc75a2d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.965ex; height:2.009ex;" alt="{\displaystyle y_{t}}"></span> is implicitly sent with <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7be17a7e5bec78adf7afc13d266bdd9514963783" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.954ex; height:2.009ex;" alt="{\displaystyle v_{t}}"></span>. </p><p>Some online prediction problems however cannot fit in the framework of OCO. For example, in online classification, the prediction domain and the loss functions are not convex. In such scenarios, two simple techniques for <a href="/wiki/Concavification" title="Concavification">convexification</a> are used: <a href="/wiki/Randomization" title="Randomization">randomisation</a> and surrogate loss functions.<sup class="noprint Inline-Template Template-Fact" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources. (September 2019)">citation needed</span></a></i>]</sup> </p><p>Some simple online convex optimisation algorithms are: </p> <div class="mw-heading mw-heading4"><h4 id="Follow_the_leader_(FTL)"><span id="Follow_the_leader_.28FTL.29"></span>Follow the leader (FTL)</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=10" title="Edit section: Follow the leader (FTL)"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The simplest learning rule to try is to select (at the current step) the hypothesis that has the least loss over all past rounds. This algorithm is called Follow the leader, and round <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/65658b7b223af9e1acc877d848888ecdb4466560" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.84ex; height:2.009ex;" alt="{\displaystyle t}"></span> is simply given by: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t}=\mathop {\operatorname {arg\,min} } _{w\in S}\sum _{i=1}^{t-1}v_{i}(w)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>=</mo> <munder> <mrow class="MJX-TeXAtom-OP"> <mrow class="MJX-TeXAtom-OP MJX-fixedlimits"> <mi mathvariant="normal">a</mi> <mi mathvariant="normal">r</mi> <mi mathvariant="normal">g</mi> <mspace width="thinmathspace" /> <mi mathvariant="normal">m</mi> <mi mathvariant="normal">i</mi> <mi mathvariant="normal">n</mi> </mrow> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>w</mi> <mo>∈<!-- ∈ --></mo> <mi>S</mi> </mrow> </munder> <mo>⁡<!-- --></mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t}=\mathop {\operatorname {arg\,min} } _{w\in S}\sum _{i=1}^{t-1}v_{i}(w)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9b18fc825bf51f8313e7b27db9d8fabc77fff226" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:22.617ex; height:7.343ex;" alt="{\displaystyle w_{t}=\mathop {\operatorname {arg\,min} } _{w\in S}\sum _{i=1}^{t-1}v_{i}(w)}"></span> This method can thus be looked as a <a href="/wiki/Greedy_algorithm" title="Greedy algorithm">greedy algorithm</a>. For the case of online quadratic optimization (where the loss function is <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}(w)=\left\|w-x_{t}\right\|_{2}^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>=</mo> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>w</mi> <mo>−<!-- − --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}(w)=\left\|w-x_{t}\right\|_{2}^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7d7f6b94c9fc4a5402134def6da0370b4eda043d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:18.565ex; height:3.509ex;" alt="{\displaystyle v_{t}(w)=\left\|w-x_{t}\right\|_{2}^{2}}"></span>), one can show a regret bound that grows as <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \log(T)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>log</mi> <mo>⁡<!-- --></mo> <mo stretchy="false">(</mo> <mi>T</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \log(T)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1bee4e727686d591fcbe58960c39d6046ba79e66" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.417ex; height:2.843ex;" alt="{\displaystyle \log(T)}"></span>. However, similar bounds cannot be obtained for the FTL algorithm for other important families of models like online linear optimization. To do so, one modifies FTL by adding regularisation. </p> <div class="mw-heading mw-heading4"><h4 id="Follow_the_regularised_leader_(FTRL)"><span id="Follow_the_regularised_leader_.28FTRL.29"></span>Follow the regularised leader (FTRL)</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=11" title="Edit section: Follow the regularised leader (FTRL)"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>This is a natural modification of FTL that is used to stabilise the FTL solutions and obtain better regret bounds. A regularisation function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle R:S\to \mathbb {R} }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>R</mi> <mo>:</mo> <mi>S</mi> <mo stretchy="false">→<!-- → --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle R:S\to \mathbb {R} }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c0046789e66ed60ddb949c9f3685f721e79f7f4c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:10.493ex; height:2.176ex;" alt="{\displaystyle R:S\to \mathbb {R} }"></span> is chosen and learning performed in round <span class="texhtml mvar" style="font-style:italic;">t</span> as follows: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t}=\mathop {\operatorname {arg\,min} } _{w\in S}\sum _{i=1}^{t-1}v_{i}(w)+R(w)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>=</mo> <munder> <mrow class="MJX-TeXAtom-OP"> <mrow class="MJX-TeXAtom-OP MJX-fixedlimits"> <mi mathvariant="normal">a</mi> <mi mathvariant="normal">r</mi> <mi mathvariant="normal">g</mi> <mspace width="thinmathspace" /> <mi mathvariant="normal">m</mi> <mi mathvariant="normal">i</mi> <mi mathvariant="normal">n</mi> </mrow> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>w</mi> <mo>∈<!-- ∈ --></mo> <mi>S</mi> </mrow> </munder> <mo>⁡<!-- --></mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>+</mo> <mi>R</mi> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t}=\mathop {\operatorname {arg\,min} } _{w\in S}\sum _{i=1}^{t-1}v_{i}(w)+R(w)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e6e1a9014eb9f97f3098d18c68152a45b4d72b11" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:30.695ex; height:7.343ex;" alt="{\displaystyle w_{t}=\mathop {\operatorname {arg\,min} } _{w\in S}\sum _{i=1}^{t-1}v_{i}(w)+R(w)}"></span> As a special example, consider the case of online linear optimisation i.e. where nature sends back loss functions of the form <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}(w)=\langle w,z_{t}\rangle }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>w</mi> <mo>,</mo> <msub> <mi>z</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}(w)=\langle w,z_{t}\rangle }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/be15a7bfc7977ef704a367523ba85eb83607250f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:14.94ex; height:2.843ex;" alt="{\displaystyle v_{t}(w)=\langle w,z_{t}\rangle }"></span>. Also, let <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle S=\mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>S</mi> <mo>=</mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle S=\mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0b0bf04d6266ea81c5279a23971eacd19af797b2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:7.368ex; height:2.676ex;" alt="{\displaystyle S=\mathbb {R} ^{d}}"></span>. Suppose the regularisation function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle R(w)={\frac {1}{2\eta }}\left\|w\right\|_{2}^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mi>R</mi> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mrow> <mn>2</mn> <mi>η<!-- η --></mi> </mrow> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle R(w)={\frac {1}{2\eta }}\left\|w\right\|_{2}^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/db568d6b4ca58ec8a5d0dda5257462f045ded185" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.505ex; width:15.864ex; height:4.009ex;" alt="{\textstyle R(w)={\frac {1}{2\eta }}\left\|w\right\|_{2}^{2}}"></span> is chosen for some positive number <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \eta }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>η<!-- η --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \eta }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e4d701857cf5fbec133eebaf94deadf722537f64" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:1.169ex; height:2.176ex;" alt="{\displaystyle \eta }"></span>. Then, one can show that the regret minimising iteration becomes <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t+1}=-\eta \sum _{i=1}^{t}z_{i}=w_{t}-\eta z_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <mo>−<!-- − --></mo> <mi>η<!-- η --></mi> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </munderover> <msub> <mi>z</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>−<!-- − --></mo> <mi>η<!-- η --></mi> <msub> <mi>z</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t+1}=-\eta \sum _{i=1}^{t}z_{i}=w_{t}-\eta z_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d421801678c57a86cff53a73029e97cbf76a2b8a" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:28.182ex; height:7.176ex;" alt="{\displaystyle w_{t+1}=-\eta \sum _{i=1}^{t}z_{i}=w_{t}-\eta z_{t}}"></span> Note that this can be rewritten as <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t+1}=w_{t}-\eta \nabla v_{t}(w_{t})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>−<!-- − --></mo> <mi>η<!-- η --></mi> <mi mathvariant="normal">∇<!-- ∇ --></mi> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t+1}=w_{t}-\eta \nabla v_{t}(w_{t})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/45395b53a2136da7f278c854e7c21cdb65b9e67c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:22.378ex; height:2.843ex;" alt="{\displaystyle w_{t+1}=w_{t}-\eta \nabla v_{t}(w_{t})}"></span>, which looks exactly like online gradient descent. </p><p>If <span class="texhtml mvar" style="font-style:italic;">S</span> is instead some convex subspace of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a713426956296f1668fce772df3c60b9dde8a685" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.77ex; height:2.676ex;" alt="{\displaystyle \mathbb {R} ^{d}}"></span>, <span class="texhtml mvar" style="font-style:italic;">S</span> would need to be projected onto, leading to the modified update rule <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t+1}=\Pi _{S}(-\eta \sum _{i=1}^{t}z_{i})=\Pi _{S}(\eta \theta _{t+1})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <msub> <mi mathvariant="normal">Π<!-- Π --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>S</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mo>−<!-- − --></mo> <mi>η<!-- η --></mi> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </munderover> <msub> <mi>z</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>=</mo> <msub> <mi mathvariant="normal">Π<!-- Π --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>S</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>η<!-- η --></mi> <msub> <mi>θ<!-- θ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t+1}=\Pi _{S}(-\eta \sum _{i=1}^{t}z_{i})=\Pi _{S}(\eta \theta _{t+1})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/ca5a1de357e76aaaee1139b530c10753d9a69960" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:34.651ex; height:7.176ex;" alt="{\displaystyle w_{t+1}=\Pi _{S}(-\eta \sum _{i=1}^{t}z_{i})=\Pi _{S}(\eta \theta _{t+1})}"></span> This algorithm is known as lazy projection, as the vector <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \theta _{t+1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>θ<!-- θ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \theta _{t+1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3623a056f7192e02615ab4e408ef4392c27129b7" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:4.017ex; height:2.509ex;" alt="{\displaystyle \theta _{t+1}}"></span> accumulates the gradients. It is also known as Nesterov's dual averaging algorithm. In this scenario of linear loss functions and quadratic regularisation, the regret is bounded by <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O({\sqrt {T}})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mrow class="MJX-TeXAtom-ORD"> <msqrt> <mi>T</mi> </msqrt> </mrow> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O({\sqrt {T}})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b08c799ec97aa1891429c5deea1bea86b1c8701e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.155ex; height:3.176ex;" alt="{\displaystyle O({\sqrt {T}})}"></span>, and thus the average regret goes to <span class="texhtml mvar" style="font-style:italic;">0</span> as desired. </p> <div class="mw-heading mw-heading3"><h3 id="Online_subgradient_descent_(OSD)"><span id="Online_subgradient_descent_.28OSD.29"></span>Online subgradient descent (OSD)</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=12" title="Edit section: Online subgradient descent (OSD)"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951"><div role="note" class="hatnote navigation-not-searchable">See also: <a href="/wiki/Subgradient_method" title="Subgradient method">Subgradient method</a></div> <p>The above proved a regret bound for linear loss functions <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}(w)=\langle w,z_{t}\rangle }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>w</mi> <mo>,</mo> <msub> <mi>z</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}(w)=\langle w,z_{t}\rangle }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/be15a7bfc7977ef704a367523ba85eb83607250f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:14.94ex; height:2.843ex;" alt="{\displaystyle v_{t}(w)=\langle w,z_{t}\rangle }"></span>. To generalise the algorithm to any convex loss function, the <a href="/wiki/Subgradient" class="mw-redirect" title="Subgradient">subgradient</a> <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \partial v_{t}(w_{t})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi mathvariant="normal">∂<!-- ∂ --></mi> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \partial v_{t}(w_{t})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/78863717aca31128f37dc026c157daa289930765" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.571ex; height:2.843ex;" alt="{\displaystyle \partial v_{t}(w_{t})}"></span> of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7be17a7e5bec78adf7afc13d266bdd9514963783" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.954ex; height:2.009ex;" alt="{\displaystyle v_{t}}"></span> is used as a linear approximation to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7be17a7e5bec78adf7afc13d266bdd9514963783" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.954ex; height:2.009ex;" alt="{\displaystyle v_{t}}"></span> near <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6cc7b029066adbf68868f39f3dceb58eab2d1a12" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.49ex; height:2.009ex;" alt="{\displaystyle w_{t}}"></span>, leading to the online subgradient descent algorithm: </p><p>Initialise parameter <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \eta ,w_{1}=0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>η<!-- η --></mi> <mo>,</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>=</mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \eta ,w_{1}=0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/46e2f700839d6d66e587c029fbc72c9b8364378b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:9.183ex; height:2.676ex;" alt="{\displaystyle \eta ,w_{1}=0}"></span> </p><p>For <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t=1,2,...,T}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> <mo>=</mo> <mn>1</mn> <mo>,</mo> <mn>2</mn> <mo>,</mo> <mo>.</mo> <mo>.</mo> <mo>.</mo> <mo>,</mo> <mi>T</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t=1,2,...,T}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9e782a9b81091d11641be9afacfd9b02a85e3e81" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:14.103ex; height:2.509ex;" alt="{\displaystyle t=1,2,...,T}"></span> </p> <ul><li>Predict using <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6cc7b029066adbf68868f39f3dceb58eab2d1a12" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.49ex; height:2.009ex;" alt="{\displaystyle w_{t}}"></span>, receive <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/874c306411e808e8191e8aeb95e3440e1c68d6e9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.965ex; height:2.509ex;" alt="{\displaystyle f_{t}}"></span> from nature.</li> <li>Choose <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle z_{t}\in \partial v_{t}(w_{t})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>z</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>∈<!-- ∈ --></mo> <mi mathvariant="normal">∂<!-- ∂ --></mi> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle z_{t}\in \partial v_{t}(w_{t})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/bb890df9aba35d93e2f0564af6cec6da3678de41" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:12.319ex; height:2.843ex;" alt="{\displaystyle z_{t}\in \partial v_{t}(w_{t})}"></span></li> <li>If <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle S=\mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>S</mi> <mo>=</mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle S=\mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0b0bf04d6266ea81c5279a23971eacd19af797b2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:7.368ex; height:2.676ex;" alt="{\displaystyle S=\mathbb {R} ^{d}}"></span>, update as <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t+1}=w_{t}-\eta z_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>−<!-- − --></mo> <mi>η<!-- η --></mi> <msub> <mi>z</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t+1}=w_{t}-\eta z_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/fbdeb93195963cda0c2afabd10df5b69ddce62b6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:16.096ex; height:2.509ex;" alt="{\displaystyle w_{t+1}=w_{t}-\eta z_{t}}"></span></li> <li>If <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle S\subset \mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>S</mi> <mo>⊂<!-- ⊂ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle S\subset \mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7396d3e00a93e85d9cd724ce29e250b645d3684c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:7.368ex; height:2.676ex;" alt="{\displaystyle S\subset \mathbb {R} ^{d}}"></span>, project cumulative gradients onto <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle S}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>S</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle S}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4611d85173cd3b508e67077d4a1252c9c05abca2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.499ex; height:2.176ex;" alt="{\displaystyle S}"></span> i.e. <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t+1}=\Pi _{S}(\eta \theta _{t+1}),\theta _{t+1}=\theta _{t}+z_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <msub> <mi mathvariant="normal">Π<!-- Π --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>S</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>η<!-- η --></mi> <msub> <mi>θ<!-- θ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <msub> <mi>θ<!-- θ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <msub> <mi>θ<!-- θ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>z</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t+1}=\Pi _{S}(\eta \theta _{t+1}),\theta _{t+1}=\theta _{t}+z_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4cb83ab441ee54f02cf8beedecaba2d96d2a8cc2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:32.533ex; height:2.843ex;" alt="{\displaystyle w_{t+1}=\Pi _{S}(\eta \theta _{t+1}),\theta _{t+1}=\theta _{t}+z_{t}}"></span></li></ul> <p>One can use the OSD algorithm to derive <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O({\sqrt {T}})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mrow class="MJX-TeXAtom-ORD"> <msqrt> <mi>T</mi> </msqrt> </mrow> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O({\sqrt {T}})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b08c799ec97aa1891429c5deea1bea86b1c8701e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.155ex; height:3.176ex;" alt="{\displaystyle O({\sqrt {T}})}"></span> regret bounds for the online version of <a href="/wiki/Support_vector_machine" title="Support vector machine">SVM's</a> for classification, which use the <a href="/wiki/Hinge_loss" title="Hinge loss">hinge loss</a><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle v_{t}(w)=\max\{0,1-y_{t}(w\cdot x_{t})\}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>v</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mo movablelimits="true" form="prefix">max</mo> <mo fence="false" stretchy="false">{</mo> <mn>0</mn> <mo>,</mo> <mn>1</mn> <mo>−<!-- − --></mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>w</mi> <mo>⋅<!-- ⋅ --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo fence="false" stretchy="false">}</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle v_{t}(w)=\max\{0,1-y_{t}(w\cdot x_{t})\}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/70230f311468716562e6e8feb5193d5ec59d4743" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:30.649ex; height:2.843ex;" alt="{\displaystyle v_{t}(w)=\max\{0,1-y_{t}(w\cdot x_{t})\}}"></span> </p> <div class="mw-heading mw-heading3"><h3 id="Other_algorithms">Other algorithms</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=13" title="Edit section: Other algorithms"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Quadratically regularised FTRL algorithms lead to lazily projected gradient algorithms as described above. To use the above for arbitrary convex functions and regularisers, one uses <a href="/wiki/Online_mirror_descent" class="mw-redirect" title="Online mirror descent">online mirror descent</a>. The optimal regularization in hindsight can be derived for linear loss functions, this leads to the <a href="/wiki/AdaGrad" class="mw-redirect" title="AdaGrad">AdaGrad</a> algorithm. For the Euclidean regularisation, one can show a regret bound of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O({\sqrt {T}})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mrow class="MJX-TeXAtom-ORD"> <msqrt> <mi>T</mi> </msqrt> </mrow> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O({\sqrt {T}})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b08c799ec97aa1891429c5deea1bea86b1c8701e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.155ex; height:3.176ex;" alt="{\displaystyle O({\sqrt {T}})}"></span>, which can be improved further to a <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(\log T)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>log</mi> <mo>⁡<!-- --></mo> <mi>T</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(\log T)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/060927e569d07c3fa5fc077114289c62f0d43bd3" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:8.578ex; height:2.843ex;" alt="{\displaystyle O(\log T)}"></span> for strongly convex and exp-concave loss functions. </p> <div class="mw-heading mw-heading2"><h2 id="Continual_learning">Continual learning</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=14" title="Edit section: Continual learning"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><a href="/wiki/Continual_learning" class="mw-redirect" title="Continual learning">Continual learning</a> means constantly improving the learned model by processing continuous streams of information.<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup> Continual learning capabilities are essential for software systems and autonomous agents interacting in an ever changing real world. However, continual learning is a challenge for machine learning and neural network models since the continual acquisition of incrementally available information from non-stationary data distributions generally leads to <a href="/wiki/Catastrophic_forgetting" class="mw-redirect" title="Catastrophic forgetting">catastrophic forgetting</a>. </p> <div class="mw-heading mw-heading2"><h2 id="Interpretations_of_online_learning">Interpretations of online learning</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=15" title="Edit section: Interpretations of online learning"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The paradigm of online learning has different interpretations depending on the choice of the learning model, each of which has distinct implications about the predictive quality of the sequence of functions <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{1},f_{2},\ldots ,f_{n}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>,</mo> <mo>…<!-- … --></mo> <mo>,</mo> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{1},f_{2},\ldots ,f_{n}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9fe58abcecb7415dea502115e7db74734f145584" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:12.957ex; height:2.509ex;" alt="{\displaystyle f_{1},f_{2},\ldots ,f_{n}}"></span>. The prototypical stochastic gradient descent algorithm is used for this discussion. As noted above, its recursion is given by <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{t}=w_{t-1}-\gamma _{t}\nabla V(\langle w_{t-1},x_{t}\rangle ,y_{t})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi>γ<!-- γ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mi mathvariant="normal">∇<!-- ∇ --></mi> <mi>V</mi> <mo stretchy="false">(</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{t}=w_{t-1}-\gamma _{t}\nabla V(\langle w_{t-1},x_{t}\rangle ,y_{t})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/eccb0b75e94de94ffdec6c120cd8b3cd4140fa93" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:33.171ex; height:2.843ex;" alt="{\displaystyle w_{t}=w_{t-1}-\gamma _{t}\nabla V(\langle w_{t-1},x_{t}\rangle ,y_{t})}"></span> </p><p>The first interpretation consider the <a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">stochastic gradient descent</a> method as applied to the problem of minimizing the expected risk <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I[w]}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>I</mi> <mo stretchy="false">[</mo> <mi>w</mi> <mo stretchy="false">]</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I[w]}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/42ebf51718836c1a9f3919691fc4f1d81d7d481d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:4.13ex; height:2.843ex;" alt="{\displaystyle I[w]}"></span> defined above.<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">[</span>6<span class="cite-bracket">]</span></a></sup> Indeed, in the case of an infinite stream of data, since the examples <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (x_{1},y_{1}),(x_{2},y_{2}),\ldots }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>,</mo> <mo>…<!-- … --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (x_{1},y_{1}),(x_{2},y_{2}),\ldots }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0ab216837403ea50bd6cddf544a29e4e8491bca0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:19.632ex; height:2.843ex;" alt="{\displaystyle (x_{1},y_{1}),(x_{2},y_{2}),\ldots }"></span> are assumed to be drawn i.i.d. from the distribution <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle p(x,y)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>p</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>y</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle p(x,y)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/089e91a1824e14cebc8e8d04dc652c61b3008e0a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; margin-left: -0.089ex; width:6.587ex; height:2.843ex;" alt="{\displaystyle p(x,y)}"></span>, the sequence of gradients of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle V(\cdot ,\cdot )}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>V</mi> <mo stretchy="false">(</mo> <mo>⋅<!-- ⋅ --></mo> <mo>,</mo> <mo>⋅<!-- ⋅ --></mo> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle V(\cdot ,\cdot )}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e8a5bfec9da68c91698ea8828a02982e2ce01b7f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.924ex; height:2.843ex;" alt="{\displaystyle V(\cdot ,\cdot )}"></span> in the above iteration are an i.i.d. sample of stochastic estimates of the gradient of the expected risk <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I[w]}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>I</mi> <mo stretchy="false">[</mo> <mi>w</mi> <mo stretchy="false">]</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I[w]}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/42ebf51718836c1a9f3919691fc4f1d81d7d481d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:4.13ex; height:2.843ex;" alt="{\displaystyle I[w]}"></span> and therefore one can apply complexity results for the stochastic gradient descent method to bound the deviation <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I[w_{t}]-I[w^{\ast }]}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>I</mi> <mo stretchy="false">[</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">]</mo> <mo>−<!-- − --></mo> <mi>I</mi> <mo stretchy="false">[</mo> <msup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msup> <mo stretchy="false">]</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I[w_{t}]-I[w^{\ast }]}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6c0e3532c4a8fa3b6bdbf730941948ca8ffcb3f9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:12.98ex; height:2.843ex;" alt="{\displaystyle I[w_{t}]-I[w^{\ast }]}"></span>, where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w^{\ast }}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w^{\ast }}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88bf1b79b1bf570879555bfc5a296fe90617589c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.718ex; height:2.343ex;" alt="{\displaystyle w^{\ast }}"></span> is the minimizer of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I[w]}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>I</mi> <mo stretchy="false">[</mo> <mi>w</mi> <mo stretchy="false">]</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I[w]}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/42ebf51718836c1a9f3919691fc4f1d81d7d481d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:4.13ex; height:2.843ex;" alt="{\displaystyle I[w]}"></span>.<sup id="cite_ref-kushneryin_7-0" class="reference"><a href="#cite_note-kushneryin-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> This interpretation is also valid in the case of a finite training set; although with multiple passes through the data the gradients are no longer independent, still complexity results can be obtained in special cases. </p><p>The second interpretation applies to the case of a finite training set and considers the SGD algorithm as an instance of incremental gradient descent method.<sup id="cite_ref-bertsekas_3-1" class="reference"><a href="#cite_note-bertsekas-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup> In this case, one instead looks at the empirical risk: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I_{n}[w]={\frac {1}{n}}\sum _{i=1}^{n}V(\langle w,x_{i}\rangle ,y_{i})\ .}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>I</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> <mo stretchy="false">[</mo> <mi>w</mi> <mo stretchy="false">]</mo> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <mi>V</mi> <mo stretchy="false">(</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>w</mi> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mtext> </mtext> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I_{n}[w]={\frac {1}{n}}\sum _{i=1}^{n}V(\langle w,x_{i}\rangle ,y_{i})\ .}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9b8f34491d029210c40d1ae50439e6b8940a89d7" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:29.091ex; height:6.843ex;" alt="{\displaystyle I_{n}[w]={\frac {1}{n}}\sum _{i=1}^{n}V(\langle w,x_{i}\rangle ,y_{i})\ .}"></span> Since the gradients of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle V(\cdot ,\cdot )}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>V</mi> <mo stretchy="false">(</mo> <mo>⋅<!-- ⋅ --></mo> <mo>,</mo> <mo>⋅<!-- ⋅ --></mo> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle V(\cdot ,\cdot )}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e8a5bfec9da68c91698ea8828a02982e2ce01b7f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.924ex; height:2.843ex;" alt="{\displaystyle V(\cdot ,\cdot )}"></span> in the incremental gradient descent iterations are also stochastic estimates of the gradient of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I_{n}[w]}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>I</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> <mo stretchy="false">[</mo> <mi>w</mi> <mo stretchy="false">]</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I_{n}[w]}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0d3517a76840cb00d16481a59a5e0eb5efaa54a9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.199ex; height:2.843ex;" alt="{\displaystyle I_{n}[w]}"></span>, this interpretation is also related to the stochastic gradient descent method, but applied to minimize the empirical risk as opposed to the expected risk. Since this interpretation concerns the empirical risk and not the expected risk, multiple passes through the data are readily allowed and actually lead to tighter bounds on the deviations <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I_{n}[w_{t}]-I_{n}[w_{n}^{\ast }]}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>I</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> <mo stretchy="false">[</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">]</mo> <mo>−<!-- − --></mo> <msub> <mi>I</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> <mo stretchy="false">[</mo> <msubsup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> <mo stretchy="false">]</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I_{n}[w_{t}]-I_{n}[w_{n}^{\ast }]}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0e2d73a59f3e75a8d8df94eae93e3d4ac3223ccf" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:15.284ex; height:2.843ex;" alt="{\displaystyle I_{n}[w_{t}]-I_{n}[w_{n}^{\ast }]}"></span>, where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w_{n}^{\ast }}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msubsup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w_{n}^{\ast }}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e987128a9c50932c880e5c5b2fa168a91b044b5a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.883ex; height:2.509ex;" alt="{\displaystyle w_{n}^{\ast }}"></span> is the minimizer of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle I_{n}[w]}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>I</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msub> <mo stretchy="false">[</mo> <mi>w</mi> <mo stretchy="false">]</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle I_{n}[w]}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0d3517a76840cb00d16481a59a5e0eb5efaa54a9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.199ex; height:2.843ex;" alt="{\displaystyle I_{n}[w]}"></span>. </p> <div class="mw-heading mw-heading2"><h2 id="Implementations">Implementations</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=16" title="Edit section: Implementations"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a href="/wiki/Vowpal_Wabbit" title="Vowpal Wabbit">Vowpal Wabbit</a>: Open-source fast out-of-core online learning system which is notable for supporting a number of machine learning <a href="/wiki/Reduction_(complexity)" title="Reduction (complexity)">reductions</a>, importance weighting and a selection of different loss functions and optimisation algorithms. It uses the <a href="/wiki/Feature_hashing" title="Feature hashing">hashing trick</a> for bounding the size of the set of features independent of the amount of training data.</li> <li><a href="/wiki/Scikit-learn" title="Scikit-learn">scikit-learn</a>: Provides out-of-core implementations of algorithms for <ul><li>Classification: <a href="/wiki/Perceptron" title="Perceptron">Perceptron</a>, <a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">SGD classifier</a>, <a href="/wiki/Naive_Bayes_classifier" title="Naive Bayes classifier">Naive bayes classifier</a>.</li> <li>Regression: SGD Regressor, Passive Aggressive regressor.</li> <li>Clustering: <a href="/wiki/K-means_clustering" title="K-means clustering">Mini-batch k-means</a>.</li> <li>Feature extraction: <a href="/wiki/Dictionary_learning" class="mw-redirect" title="Dictionary learning">Mini-batch dictionary learning</a>, <a href="/wiki/Principal_component_analysis" title="Principal component analysis">Incremental PCA</a>.</li></ul></li></ul> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=17" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><b>Learning paradigms</b> </p> <ul><li><a href="/wiki/Incremental_learning" title="Incremental learning">Incremental learning</a></li> <li><a href="/wiki/Lazy_learning" title="Lazy learning">Lazy learning</a></li> <li><a href="/wiki/Offline_learning" title="Offline learning">Offline learning</a>, the opposite model</li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></li> <li><a href="/wiki/Multi-armed_bandit" title="Multi-armed bandit">Multi-armed bandit</a></li> <li><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a></li></ul> <p><b>General algorithms</b> </p> <ul><li><a href="/wiki/Online_algorithm" title="Online algorithm">Online algorithm</a></li> <li><a href="/wiki/Online_optimization" title="Online optimization">Online optimization</a></li> <li><a href="/wiki/Streaming_algorithm" title="Streaming algorithm">Streaming algorithm</a></li> <li><a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">Stochastic gradient descent</a></li></ul> <p><b>Learning models</b> </p> <ul><li><a href="/wiki/Adaptive_Resonance_Theory" class="mw-redirect" title="Adaptive Resonance Theory">Adaptive Resonance Theory</a></li> <li><a href="/wiki/Hierarchical_temporal_memory" title="Hierarchical temporal memory">Hierarchical temporal memory</a></li> <li><a href="/wiki/K-nearest_neighbor_algorithm" class="mw-redirect" title="K-nearest neighbor algorithm">k-nearest neighbor algorithm</a></li> <li><a href="/wiki/Learning_vector_quantization" title="Learning vector quantization">Learning vector quantization</a></li> <li><a href="/wiki/Perceptron" title="Perceptron">Perceptron</a></li></ul> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=18" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-references-wrap"><ol class="references"> <li id="cite_note-lorenzo-1"><span class="mw-cite-backlink">^ <a href="#cite_ref-lorenzo_1-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-lorenzo_1-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-lorenzo_1-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-lorenzo_1-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-lorenzo_1-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-lorenzo_1-5"><sup><i><b>f</b></i></sup></a> <a href="#cite_ref-lorenzo_1-6"><sup><i><b>g</b></i></sup></a></span> <span class="reference-text">L. Rosasco, T. Poggio, Machine Learning: a Regularization Approach, MIT-9.520 Lectures Notes, Manuscript, Dec. 2015. Chapter 7 - Online Learning</span> </li> <li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite id="CITEREFKushnerYin2003" class="citation book cs1">Kushner, Harold J.; Yin, G. George (2003). <span class="id-lock-limited" title="Free access subject to limited trial, subscription normally required"><a rel="nofollow" class="external text" href="https://archive.org/details/stochasticapprox00yinh"><i>Stochastic Approximation and Recursive Algorithms with Applications</i></a></span> (Second ed.). New York: Springer. pp. <a rel="nofollow" class="external text" href="https://archive.org/details/stochasticapprox00yinh/page/n30">8</a>–12. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a> <a href="/wiki/Special:BookSources/978-0-387-21769-7" title="Special:BookSources/978-0-387-21769-7"><bdi>978-0-387-21769-7</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=Stochastic+Approximation+and+Recursive+Algorithms+with+Applications&rft.place=New+York&rft.pages=8-12&rft.edition=Second&rft.pub=Springer&rft.date=2003&rft.isbn=978-0-387-21769-7&rft.aulast=Kushner&rft.aufirst=Harold+J.&rft.au=Yin%2C+G.+George&rft_id=https%3A%2F%2Farchive.org%2Fdetails%2Fstochasticapprox00yinh&rfr_id=info%3Asid%2Fen.wikipedia.org%3AOnline+machine+learning" class="Z3988"></span></span> </li> <li id="cite_note-bertsekas-3"><span class="mw-cite-backlink">^ <a href="#cite_ref-bertsekas_3-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-bertsekas_3-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text">Bertsekas, D. P. (2011). Incremental gradient, subgradient, and proximal methods for convex optimization: a survey. Optimization for Machine Learning, 85.</span> </li> <li id="cite_note-4"><span class="mw-cite-backlink"><b><a href="#cite_ref-4">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHazan2015" class="citation book cs1"><a href="/wiki/Elad_Hazan" title="Elad Hazan">Hazan, Elad</a> (2015). <a rel="nofollow" class="external text" href="http://ocobook.cs.princeton.edu/OCObook.pdf"><i>Introduction to Online Convex Optimization</i></a> <span class="cs1-format">(PDF)</span>. Foundations and Trends in Optimization.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=Introduction+to+Online+Convex+Optimization&rft.pub=Foundations+and+Trends+in+Optimization&rft.date=2015&rft.aulast=Hazan&rft.aufirst=Elad&rft_id=http%3A%2F%2Focobook.cs.princeton.edu%2FOCObook.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3AOnline+machine+learning" class="Z3988"></span></span> </li> <li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFParisiKemkerPartKanan2019" class="citation journal cs1">Parisi, German I.; Kemker, Ronald; Part, Jose L.; Kanan, Christopher; Wermter, Stefan (2019). <a rel="nofollow" class="external text" href="https://dx.doi.org/10.1016/j.neunet.2019.01.012">"Continual lifelong learning with neural networks: A review"</a>. <i>Neural Networks</i>. <b>113</b>: 54–71. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1802.07569">1802.07569</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.neunet.2019.01.012">10.1016/j.neunet.2019.01.012</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a> <a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0893-6080">0893-6080</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Neural+Networks&rft.atitle=Continual+lifelong+learning+with+neural+networks%3A+A+review&rft.volume=113&rft.pages=54-71&rft.date=2019&rft_id=info%3Aarxiv%2F1802.07569&rft.issn=0893-6080&rft_id=info%3Adoi%2F10.1016%2Fj.neunet.2019.01.012&rft.aulast=Parisi&rft.aufirst=German+I.&rft.au=Kemker%2C+Ronald&rft.au=Part%2C+Jose+L.&rft.au=Kanan%2C+Christopher&rft.au=Wermter%2C+Stefan&rft_id=http%3A%2F%2Fdx.doi.org%2F10.1016%2Fj.neunet.2019.01.012&rfr_id=info%3Asid%2Fen.wikipedia.org%3AOnline+machine+learning" class="Z3988"></span></span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFBottou1998" class="citation book cs1"><a href="/wiki/L%C3%A9on_Bottou" title="Léon Bottou">Bottou, Léon</a> (1998). "Online Algorithms and Stochastic Approximations". <span class="id-lock-registration" title="Free registration required"><a rel="nofollow" class="external text" href="https://archive.org/details/onlinelearningin0000unse"><i>Online Learning and Neural Networks</i></a></span>. Cambridge University Press. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a> <a href="/wiki/Special:BookSources/978-0-521-65263-6" title="Special:BookSources/978-0-521-65263-6"><bdi>978-0-521-65263-6</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=bookitem&rft.atitle=Online+Algorithms+and+Stochastic+Approximations&rft.btitle=Online+Learning+and+Neural+Networks&rft.pub=Cambridge+University+Press&rft.date=1998&rft.isbn=978-0-521-65263-6&rft.aulast=Bottou&rft.aufirst=L%C3%A9on&rft_id=https%3A%2F%2Farchive.org%2Fdetails%2Fonlinelearningin0000unse&rfr_id=info%3Asid%2Fen.wikipedia.org%3AOnline+machine+learning" class="Z3988"></span></span> </li> <li id="cite_note-kushneryin-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-kushneryin_7-0">^</a></b></span> <span class="reference-text"><i>Stochastic Approximation Algorithms and Applications</i>, Harold J. Kushner and G. George Yin, New York: Springer-Verlag, 1997. <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a> <a href="/wiki/Special:BookSources/0-387-94916-X" title="Special:BookSources/0-387-94916-X">0-387-94916-X</a>; 2nd ed., titled <i>Stochastic Approximation and Recursive Algorithms and Applications</i>, 2003, <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a> <a href="/wiki/Special:BookSources/0-387-00894-2" title="Special:BookSources/0-387-00894-2">0-387-00894-2</a>.</span> </li> </ol></div> <div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Online_machine_learning&action=edit&section=19" title="Edit section: External links"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a rel="nofollow" class="external text" href="https://www.mit.edu/~rakhlin/6.883/">6.883: Online Methods in Machine Learning: Theory and Applications. Alexander Rakhlin. MIT</a></li></ul> <!-- NewPP limit report Parsed by mw‐web.eqiad.main‐5dc468848‐2dlmj Cached time: 20241122144538 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.496 seconds Real time usage: 0.930 seconds Preprocessor visited node count: 2342/1000000 Post‐expand include size: 47209/2097152 bytes Template argument size: 2854/2097152 bytes Highest expansion depth: 15/100 Expensive parser function count: 7/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 40902/5000000 bytes Lua time usage: 0.228/10.000 seconds Lua memory usage: 4639438/52428800 bytes Number of Wikibase entities loaded: 0/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 409.219 1 -total 29.53% 120.837 1 Template:Machine_learning 27.60% 112.940 1 Template:Sidebar_with_collapsible_lists 21.88% 89.536 3 Template:Cite_book 15.60% 63.830 1 Template:Short_description 9.63% 39.411 2 Template:Pagetype 8.74% 35.769 1 Template:Confuse 8.69% 35.541 1 Template:Citation_needed 7.22% 29.537 1 Template:Fix 5.11% 20.926 2 Template:ISBN --> <!-- Saved in parser cache with key enwiki:pcache:idhash:19892153-0!canonical and timestamp 20241122144538 and revision id 1243080168. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Online_machine_learning&oldid=1243080168">https://en.wikipedia.org/w/index.php?title=Online_machine_learning&oldid=1243080168</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Online_algorithms" title="Category:Online algorithms">Online algorithms</a></li><li><a href="/wiki/Category:Machine_learning_algorithms" title="Category:Machine learning algorithms">Machine learning algorithms</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_matches_Wikidata" title="Category:Short description matches Wikidata">Short description matches Wikidata</a></li><li><a href="/wiki/Category:All_articles_with_unsourced_statements" title="Category:All articles with unsourced statements">All articles with unsourced statements</a></li><li><a href="/wiki/Category:Articles_with_unsourced_statements_from_September_2019" title="Category:Articles with unsourced statements from September 2019">Articles with unsourced statements from September 2019</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 30 August 2024, at 11:23<span class="anonymous-show"> (UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Online_machine_learning&mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-f69cdc8f6-4d6jn","wgBackendResponseTime":154,"wgPageParseReport":{"limitreport":{"cputime":"0.496","walltime":"0.930","ppvisitednodes":{"value":2342,"limit":1000000},"postexpandincludesize":{"value":47209,"limit":2097152},"templateargumentsize":{"value":2854,"limit":2097152},"expansiondepth":{"value":15,"limit":100},"expensivefunctioncount":{"value":7,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":40902,"limit":5000000},"entityaccesscount":{"value":0,"limit":400},"timingprofile":["100.00% 409.219 1 -total"," 29.53% 120.837 1 Template:Machine_learning"," 27.60% 112.940 1 Template:Sidebar_with_collapsible_lists"," 21.88% 89.536 3 Template:Cite_book"," 15.60% 63.830 1 Template:Short_description"," 9.63% 39.411 2 Template:Pagetype"," 8.74% 35.769 1 Template:Confuse"," 8.69% 35.541 1 Template:Citation_needed"," 7.22% 29.537 1 Template:Fix"," 5.11% 20.926 2 Template:ISBN"]},"scribunto":{"limitreport-timeusage":{"value":"0.228","limit":"10.000"},"limitreport-memusage":{"value":4639438,"limit":52428800}},"cachereport":{"origin":"mw-web.eqiad.main-5dc468848-2dlmj","timestamp":"20241122144538","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Online machine learning","url":"https:\/\/en.wikipedia.org\/wiki\/Online_machine_learning","sameAs":"http:\/\/www.wikidata.org\/entity\/Q7094097","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q7094097","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2008-10-22T19:43:10Z","dateModified":"2024-08-30T11:23:43Z","headline":"method of machine learning"}</script> </body> </html>