CINXE.COM
Regularized least squares - Wikipedia
<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Regularized least squares - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"925592ca-f3fd-458e-86fd-e9f848f83a25","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Regularized_least_squares","wgTitle":"Regularized least squares","wgCurRevisionId":1259548425,"wgRevisionId":1259548425,"wgArticleId":48803892,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Least squares","Linear algebra","Inverse problems"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Regularized_least_squares","wgRelevantArticleId":48803892,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}}, "wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":20000,"wgRelatedArticlesCompat":[],"wgCentralAuthMobileDomain":false,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q25304486","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready", "user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.math.styles":"ready","ext.cite.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.tablesorter.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","ext.scribunto.logs","site","mediawiki.page.ready","jquery.tablesorter","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming", "ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.quicksurveys.init","ext.growthExperiments.SuggestedEditSession","wikibase.sidebar.tracking"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.cite.styles%7Cext.math.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.tablesorter.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&only=styles&skin=vector-2022"> <script async="" src="/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&modules=site.styles&only=styles&skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Regularized least squares - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Regularized_least_squares"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Regularized_least_squares&action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Regularized_least_squares"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Regularized_least_squares rootpage-Regularized_least_squares skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page's font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&returnto=Regularized+least+squares" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&returnto=Regularized+least+squares" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&returnto=Regularized+least+squares" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&returnto=Regularized+least+squares" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-General_formulation" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#General_formulation"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>General formulation</span> </div> </a> <ul id="toc-General_formulation-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Kernel_formulation" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Kernel_formulation"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Kernel formulation</span> </div> </a> <button aria-controls="toc-Kernel_formulation-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Kernel formulation subsection</span> </button> <ul id="toc-Kernel_formulation-sublist" class="vector-toc-list"> <li id="toc-Definition_of_RKHS" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Definition_of_RKHS"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1</span> <span>Definition of RKHS</span> </div> </a> <ul id="toc-Definition_of_RKHS-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Arbitrary_kernel" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Arbitrary_kernel"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.2</span> <span>Arbitrary kernel</span> </div> </a> <ul id="toc-Arbitrary_kernel-sublist" class="vector-toc-list"> <li id="toc-Complexity" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Complexity"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.2.1</span> <span>Complexity</span> </div> </a> <ul id="toc-Complexity-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Prediction" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Prediction"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.3</span> <span>Prediction</span> </div> </a> <ul id="toc-Prediction-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Linear_kernel" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Linear_kernel"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.4</span> <span>Linear kernel</span> </div> </a> <ul id="toc-Linear_kernel-sublist" class="vector-toc-list"> <li id="toc-Complexity_2" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Complexity_2"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.4.1</span> <span>Complexity</span> </div> </a> <ul id="toc-Complexity_2-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> </ul> </li> <li id="toc-Feature_maps_and_Mercer's_theorem" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Feature_maps_and_Mercer's_theorem"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Feature maps and Mercer's theorem</span> </div> </a> <ul id="toc-Feature_maps_and_Mercer's_theorem-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Bayesian_interpretation" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Bayesian_interpretation"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Bayesian interpretation</span> </div> </a> <ul id="toc-Bayesian_interpretation-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Specific_examples" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Specific_examples"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>Specific examples</span> </div> </a> <button aria-controls="toc-Specific_examples-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Specific examples subsection</span> </button> <ul id="toc-Specific_examples-sublist" class="vector-toc-list"> <li id="toc-Ridge_regression_(or_Tikhonov_regularization)" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Ridge_regression_(or_Tikhonov_regularization)"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.1</span> <span>Ridge regression (or Tikhonov regularization)</span> </div> </a> <ul id="toc-Ridge_regression_(or_Tikhonov_regularization)-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Lasso_regression" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Lasso_regression"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.2</span> <span>Lasso regression</span> </div> </a> <ul id="toc-Lasso_regression-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-ℓ0_Penalization" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#ℓ0_Penalization"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.3</span> <span><i>ℓ</i><sub>0</sub> Penalization</span> </div> </a> <ul id="toc-ℓ0_Penalization-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Elastic_net" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Elastic_net"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.4</span> <span>Elastic net</span> </div> </a> <ul id="toc-Elastic_net-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Partial_list_of_RLS_methods" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Partial_list_of_RLS_methods"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>Partial list of RLS methods</span> </div> </a> <ul id="toc-Partial_list_of_RLS_methods-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-External_links" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#External_links"> <div class="vector-toc-text"> <span class="vector-toc-numb">9</span> <span>External links</span> </div> </a> <ul id="toc-External_links-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Regularized least squares</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="This article exist only in this language. Add the article for other languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-0" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">Add languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> <div class="after-portlet after-portlet-lang"><span class="uls-after-portlet-link"></span><span class="wb-langlinks-add wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q25304486#sitelinks-wikipedia" title="Add interlanguage links" class="wbc-editpage">Add links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Regularized_least_squares" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Regularized_least_squares" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Regularized_least_squares"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Regularized_least_squares&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Regularized_least_squares&action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Regularized_least_squares"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Regularized_least_squares&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Regularized_least_squares&action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Regularized_least_squares" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Regularized_least_squares" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Regularized_least_squares&oldid=1259548425" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Regularized_least_squares&action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&page=Regularized_least_squares&id=1259548425&wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FRegularized_least_squares"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FRegularized_least_squares"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&page=Regularized_least_squares&action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Regularized_least_squares&printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q25304486" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><style data-mw-deduplicate="TemplateStyles:r1251242444">.mw-parser-output .ambox{border:1px solid #a2a9b1;border-left:10px solid #36c;background-color:#fbfbfb;box-sizing:border-box}.mw-parser-output .ambox+link+.ambox,.mw-parser-output .ambox+link+style+.ambox,.mw-parser-output .ambox+link+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+style+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+link+.ambox{margin-top:-1px}html body.mediawiki .mw-parser-output .ambox.mbox-small-left{margin:4px 1em 4px 0;overflow:hidden;width:238px;border-collapse:collapse;font-size:88%;line-height:1.25em}.mw-parser-output .ambox-speedy{border-left:10px solid #b32424;background-color:#fee7e6}.mw-parser-output .ambox-delete{border-left:10px solid #b32424}.mw-parser-output .ambox-content{border-left:10px solid #f28500}.mw-parser-output .ambox-style{border-left:10px solid #fc3}.mw-parser-output .ambox-move{border-left:10px solid #9932cc}.mw-parser-output .ambox-protection{border-left:10px solid #a2a9b1}.mw-parser-output .ambox .mbox-text{border:none;padding:0.25em 0.5em;width:100%}.mw-parser-output .ambox .mbox-image{border:none;padding:2px 0 2px 0.5em;text-align:center}.mw-parser-output .ambox .mbox-imageright{border:none;padding:2px 0.5em 2px 0;text-align:center}.mw-parser-output .ambox .mbox-empty-cell{border:none;padding:0;width:1px}.mw-parser-output .ambox .mbox-image-div{width:52px}@media(min-width:720px){.mw-parser-output .ambox{margin:0 10%}}@media print{body.ns-0 .mw-parser-output .ambox{display:none!important}}</style><table class="box-Summarize plainlinks metadata ambox ambox-style" role="presentation"><tbody><tr><td class="mbox-image"><div class="mbox-image-div"><span typeof="mw:File"><span><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/40px-Edit-clear.svg.png" decoding="async" width="40" height="40" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/60px-Edit-clear.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/80px-Edit-clear.svg.png 2x" data-file-width="48" data-file-height="48" /></span></span></div></td><td class="mbox-text"><div class="mbox-text-span">This article should be summarized in <a href="/wiki/Least_squares#Regularization" title="Least squares">Least squares#Regularization</a> and a link provided from there to here using the <code>{{<a href="/wiki/Template:Main" title="Template:Main">Main</a>}}</code> template.<span class="hide-when-compact"> See guidance in <a href="/wiki/Wikipedia:Summary_style" title="Wikipedia:Summary style">Wikipedia:Summary style</a>.</span> <span class="date-container"><i>(<span class="date">November 2020</span>)</i></span></div></td></tr></tbody></table> <style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1246091330">.mw-parser-output .sidebar{width:22em;float:right;clear:right;margin:0.5em 0 1em 1em;background:var(--background-color-neutral-subtle,#f8f9fa);border:1px solid var(--border-color-base,#a2a9b1);padding:0.2em;text-align:center;line-height:1.4em;font-size:88%;border-collapse:collapse;display:table}body.skin-minerva .mw-parser-output .sidebar{display:table!important;float:right!important;margin:0.5em 0 1em 1em!important}.mw-parser-output .sidebar-subgroup{width:100%;margin:0;border-spacing:0}.mw-parser-output .sidebar-left{float:left;clear:left;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-none{float:none;clear:both;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-outer-title{padding:0 0.4em 0.2em;font-size:125%;line-height:1.2em;font-weight:bold}.mw-parser-output .sidebar-top-image{padding:0.4em}.mw-parser-output .sidebar-top-caption,.mw-parser-output .sidebar-pretitle-with-top-image,.mw-parser-output .sidebar-caption{padding:0.2em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-pretitle{padding:0.4em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-title,.mw-parser-output .sidebar-title-with-pretitle{padding:0.2em 0.8em;font-size:145%;line-height:1.2em}.mw-parser-output .sidebar-title-with-pretitle{padding:0.1em 0.4em}.mw-parser-output .sidebar-image{padding:0.2em 0.4em 0.4em}.mw-parser-output .sidebar-heading{padding:0.1em 0.4em}.mw-parser-output .sidebar-content{padding:0 0.5em 0.4em}.mw-parser-output .sidebar-content-with-subgroup{padding:0.1em 0.4em 0.2em}.mw-parser-output .sidebar-above,.mw-parser-output .sidebar-below{padding:0.3em 0.8em;font-weight:bold}.mw-parser-output .sidebar-collapse .sidebar-above,.mw-parser-output .sidebar-collapse .sidebar-below{border-top:1px solid #aaa;border-bottom:1px solid #aaa}.mw-parser-output .sidebar-navbar{text-align:right;font-size:115%;padding:0 0.4em 0.4em}.mw-parser-output .sidebar-list-title{padding:0 0.4em;text-align:left;font-weight:bold;line-height:1.6em;font-size:105%}.mw-parser-output .sidebar-list-title-c{padding:0 0.4em;text-align:center;margin:0 3.3em}@media(max-width:640px){body.mediawiki .mw-parser-output .sidebar{width:100%!important;clear:both;float:none!important;margin-left:0!important;margin-right:0!important}}body.skin--responsive .mw-parser-output .sidebar a>img{max-width:none!important}@media screen{html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media print{body.ns-0 .mw-parser-output .sidebar{display:none!important}}</style><table class="sidebar nomobile nowraplinks hlist"><tbody><tr><td class="sidebar-pretitle">Part of a series on</td></tr><tr><th class="sidebar-title-with-pretitle"><a href="/wiki/Regression_analysis" title="Regression analysis">Regression analysis</a></th></tr><tr><th class="sidebar-heading"> Models</th></tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Linear_regression" title="Linear regression">Linear regression</a></li> <li><a href="/wiki/Simple_linear_regression" title="Simple linear regression">Simple regression</a></li> <li><a href="/wiki/Polynomial_regression" title="Polynomial regression">Polynomial regression</a></li> <li><a href="/wiki/General_linear_model" title="General linear model">General linear model</a></li></ul></td> </tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Generalized_linear_model" title="Generalized linear model">Generalized linear model</a></li> <li><a href="/wiki/Vector_generalized_linear_model" title="Vector generalized linear model">Vector generalized linear model</a></li> <li><a href="/wiki/Discrete_choice" title="Discrete choice">Discrete choice</a></li> <li><a href="/wiki/Binomial_regression" title="Binomial regression">Binomial regression</a></li> <li><a href="/wiki/Binary_regression" title="Binary regression">Binary regression</a></li> <li><a href="/wiki/Logistic_regression" title="Logistic regression">Logistic regression</a></li> <li><a href="/wiki/Multinomial_logistic_regression" title="Multinomial logistic regression">Multinomial logistic regression</a></li> <li><a href="/wiki/Mixed_logit" title="Mixed logit">Mixed logit</a></li> <li><a href="/wiki/Probit_model" title="Probit model">Probit</a></li> <li><a href="/wiki/Multinomial_probit" title="Multinomial probit">Multinomial probit</a></li> <li><a href="/wiki/Ordered_logit" title="Ordered logit">Ordered logit</a></li> <li><a href="/wiki/Ordered_probit" class="mw-redirect" title="Ordered probit">Ordered probit</a></li> <li><a href="/wiki/Poisson_regression" title="Poisson regression">Poisson</a></li></ul></td> </tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Multilevel_model" title="Multilevel model">Multilevel model</a></li> <li><a href="/wiki/Fixed_effects_model" title="Fixed effects model">Fixed effects</a></li> <li><a href="/wiki/Random_effects_model" title="Random effects model">Random effects</a></li> <li><a href="/wiki/Mixed_model" title="Mixed model">Linear mixed-effects model</a></li> <li><a href="/wiki/Nonlinear_mixed-effects_model" title="Nonlinear mixed-effects model">Nonlinear mixed-effects model</a></li></ul></td> </tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Nonlinear_regression" title="Nonlinear regression">Nonlinear regression</a></li> <li><a href="/wiki/Nonparametric_regression" title="Nonparametric regression">Nonparametric</a></li> <li><a href="/wiki/Semiparametric_regression" title="Semiparametric regression">Semiparametric</a></li> <li><a href="/wiki/Robust_regression" title="Robust regression">Robust</a></li> <li><a href="/wiki/Quantile_regression" title="Quantile regression">Quantile</a></li> <li><a href="/wiki/Isotonic_regression" title="Isotonic regression">Isotonic</a></li> <li><a href="/wiki/Principal_component_regression" title="Principal component regression">Principal components</a></li> <li><a href="/wiki/Least-angle_regression" title="Least-angle regression">Least angle</a></li> <li><a href="/wiki/Local_regression" title="Local regression">Local</a></li> <li><a href="/wiki/Segmented_regression" title="Segmented regression">Segmented</a></li></ul></td> </tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Errors-in-variables_models" title="Errors-in-variables models">Errors-in-variables</a></li></ul></td> </tr><tr><th class="sidebar-heading"> Estimation</th></tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Least_squares" title="Least squares">Least squares</a></li> <li><a href="/wiki/Linear_least_squares" title="Linear least squares">Linear</a></li> <li><a href="/wiki/Non-linear_least_squares" title="Non-linear least squares">Non-linear</a></li></ul></td> </tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Ordinary_least_squares" title="Ordinary least squares">Ordinary</a></li> <li><a href="/wiki/Weighted_least_squares" title="Weighted least squares">Weighted</a></li> <li><a href="/wiki/Generalized_least_squares" title="Generalized least squares">Generalized</a></li> <li><a href="/wiki/Generalized_estimating_equation" title="Generalized estimating equation">Generalized estimating equation</a></li></ul></td> </tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Partial_least_squares_regression" title="Partial least squares regression">Partial</a></li> <li><a href="/wiki/Total_least_squares" title="Total least squares">Total</a></li> <li><a href="/wiki/Non-negative_least_squares" title="Non-negative least squares">Non-negative</a></li> <li><a href="/wiki/Tikhonov_regularization" class="mw-redirect" title="Tikhonov regularization">Ridge regression</a></li> <li><a class="mw-selflink selflink">Regularized</a></li></ul></td> </tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Least_absolute_deviations" title="Least absolute deviations">Least absolute deviations</a></li> <li><a href="/wiki/Iteratively_reweighted_least_squares" title="Iteratively reweighted least squares">Iteratively reweighted</a></li> <li><a href="/wiki/Bayesian_linear_regression" title="Bayesian linear regression">Bayesian</a></li> <li><a href="/wiki/Bayesian_multivariate_linear_regression" title="Bayesian multivariate linear regression">Bayesian multivariate</a></li> <li><a href="/wiki/Least-squares_spectral_analysis" title="Least-squares spectral analysis">Least-squares spectral analysis</a></li></ul></td> </tr><tr><th class="sidebar-heading"> Background</th></tr><tr><td class="sidebar-content"> <ul><li><a href="/wiki/Regression_validation" title="Regression validation">Regression validation</a></li> <li><a href="/wiki/Mean_and_predicted_response" class="mw-redirect" title="Mean and predicted response">Mean and predicted response</a></li> <li><a href="/wiki/Errors_and_residuals" title="Errors and residuals">Errors and residuals</a></li> <li><a href="/wiki/Goodness_of_fit" title="Goodness of fit">Goodness of fit</a></li> <li><a href="/wiki/Studentized_residual" title="Studentized residual">Studentized residual</a></li> <li><a href="/wiki/Gauss%E2%80%93Markov_theorem" title="Gauss–Markov theorem">Gauss–Markov theorem</a></li></ul></td> </tr><tr><td class="sidebar-below"> <ul><li><span class="nowrap"><span class="noviewer" typeof="mw:File"><a href="/wiki/File:Nuvola_apps_edu_mathematics_blue-p.svg" class="mw-file-description"><img alt="icon" src="//upload.wikimedia.org/wikipedia/commons/thumb/3/3e/Nuvola_apps_edu_mathematics_blue-p.svg/28px-Nuvola_apps_edu_mathematics_blue-p.svg.png" decoding="async" width="28" height="28" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/3e/Nuvola_apps_edu_mathematics_blue-p.svg/42px-Nuvola_apps_edu_mathematics_blue-p.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/3e/Nuvola_apps_edu_mathematics_blue-p.svg/56px-Nuvola_apps_edu_mathematics_blue-p.svg.png 2x" data-file-width="128" data-file-height="128" /></a></span> </span><a href="/wiki/Portal:Mathematics" title="Portal:Mathematics">Mathematics portal</a></li></ul></td></tr><tr><td class="sidebar-navbar"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Regression_bar" title="Template:Regression bar"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Regression_bar" title="Template talk:Regression bar"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Regression_bar" title="Special:EditPage/Template:Regression bar"><abbr title="Edit this template">e</abbr></a></li></ul></div></td></tr></tbody></table> <p><b>Regularized least squares</b> (<b>RLS</b>) is a family of methods for solving the <a href="/wiki/Least_squares" title="Least squares">least-squares</a> problem while using <a href="/wiki/Regularization_(mathematics)" title="Regularization (mathematics)">regularization</a> to further constrain the resulting solution. </p><p>RLS is used for two main reasons. The first comes up when the number of variables in the linear system exceeds the number of observations. In such settings, the <a href="/wiki/Ordinary_least_squares" title="Ordinary least squares">ordinary least-squares</a> problem is <a href="/wiki/Ill-posed_problem" class="mw-redirect" title="Ill-posed problem">ill-posed</a> and is therefore impossible to fit because the associated optimization problem has infinitely many solutions. RLS allows the introduction of further constraints that uniquely determine the solution. </p><p>The second reason for using RLS arises when the learned model suffers from poor <a href="/wiki/Generalization_error" title="Generalization error">generalization</a>. RLS can be used in such cases to improve the generalizability of the model by constraining it at training time. This constraint can either force the solution to be "sparse" in some way or to reflect other prior knowledge about the problem such as information about correlations between features. A <a href="/wiki/Bayesian_inference" title="Bayesian inference">Bayesian</a> understanding of this can be reached by showing that RLS methods are often equivalent to <a href="/wiki/Prior_probability" title="Prior probability">priors</a> on the solution to the least-squares problem. </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="General_formulation">General formulation</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=1" title="Edit section: General formulation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Consider a learning setting given by a probabilistic space <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (X\times Y,\rho (X,Y))}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <mi>X</mi> <mo>×<!-- × --></mo> <mi>Y</mi> <mo>,</mo> <mi>ρ<!-- ρ --></mi> <mo stretchy="false">(</mo> <mi>X</mi> <mo>,</mo> <mi>Y</mi> <mo stretchy="false">)</mo> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (X\times Y,\rho (X,Y))}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4f446c57747588674bb48d9cf80e259638f63dad" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:17.235ex; height:2.843ex;" alt="{\displaystyle (X\times Y,\rho (X,Y))}"></span>, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle Y\in R}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>Y</mi> <mo>∈<!-- ∈ --></mo> <mi>R</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle Y\in R}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/710929a07d2f0724d6ffef386108e39caa0e1334" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:6.378ex; height:2.176ex;" alt="{\displaystyle Y\in R}"></span>. Let <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle S=\{x_{i},y_{i}\}_{i=1}^{n}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>S</mi> <mo>=</mo> <mo fence="false" stretchy="false">{</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msubsup> <mo fence="false" stretchy="false">}</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle S=\{x_{i},y_{i}\}_{i=1}^{n}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/fd5cac7259f70621f459b3fa8f4d03c85b9abbc1" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:14.925ex; height:3.009ex;" alt="{\displaystyle S=\{x_{i},y_{i}\}_{i=1}^{n}}"></span> denote a training set of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a601995d55609f2d9f5e233e36fbe9ea26011b3b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.395ex; height:1.676ex;" alt="{\displaystyle n}"></span> pairs i.i.d. with respect to the joint distribution <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \rho }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>ρ<!-- ρ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \rho }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1f7d439671d1289b6a816e6af7a304be40608d64" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:1.202ex; height:2.176ex;" alt="{\displaystyle \rho }"></span>. Let <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle V:Y\times R\to [0;\infty )}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>V</mi> <mo>:</mo> <mi>Y</mi> <mo>×<!-- × --></mo> <mi>R</mi> <mo stretchy="false">→<!-- → --></mo> <mo stretchy="false">[</mo> <mn>0</mn> <mo>;</mo> <mi mathvariant="normal">∞<!-- ∞ --></mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle V:Y\times R\to [0;\infty )}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8189bf4f403155d018906aa2b20598ce1660f71c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:19.788ex; height:2.843ex;" alt="{\displaystyle V:Y\times R\to [0;\infty )}"></span> be a loss function. Define <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/545fd099af8541605f7ee55f08225526be88ce57" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.741ex; height:2.176ex;" alt="{\displaystyle F}"></span> as the space of the functions such that expected risk: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \varepsilon (f)=\int V(y,f(x))\,d\rho (x,y)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>ε<!-- ε --></mi> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mo>∫<!-- ∫ --></mo> <mi>V</mi> <mo stretchy="false">(</mo> <mi>y</mi> <mo>,</mo> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo stretchy="false">)</mo> <mspace width="thinmathspace" /> <mi>d</mi> <mi>ρ<!-- ρ --></mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>y</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \varepsilon (f)=\int V(y,f(x))\,d\rho (x,y)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/554b107f71ff51c2592ffee3ede007ec896ee4c0" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.338ex; width:28.187ex; height:5.676ex;" alt="{\displaystyle \varepsilon (f)=\int V(y,f(x))\,d\rho (x,y)}"></span> is well defined. The main goal is to minimize the expected risk: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \inf _{f\in F}\varepsilon (f)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <munder> <mo movablelimits="true" form="prefix">inf</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> <mo>∈<!-- ∈ --></mo> <mi>F</mi> </mrow> </munder> <mi>ε<!-- ε --></mi> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \inf _{f\in F}\varepsilon (f)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/96b2d0866d5a55cfec09323b83e885e0b31c4157" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.338ex; width:7.79ex; height:4.343ex;" alt="{\displaystyle \inf _{f\in F}\varepsilon (f)}"></span> Since the problem cannot be solved exactly there is a need to specify how to measure the quality of a solution. A good learning algorithm should provide an estimator with a small risk. </p><p>As the joint distribution <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \rho }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>ρ<!-- ρ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \rho }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1f7d439671d1289b6a816e6af7a304be40608d64" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:1.202ex; height:2.176ex;" alt="{\displaystyle \rho }"></span> is typically unknown, the empirical risk is taken. For regularized least squares the square loss function is introduced: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \varepsilon (f)={\frac {1}{n}}\sum _{i=1}^{n}V(y_{i},f(x_{i}))={\frac {1}{n}}\sum _{i=1}^{n}(y_{i}-f(x_{i}))^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>ε<!-- ε --></mi> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <mi>V</mi> <mo stretchy="false">(</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>,</mo> <mi>f</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo stretchy="false">)</mo> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <mo stretchy="false">(</mo> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>−<!-- − --></mo> <mi>f</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> <msup> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \varepsilon (f)={\frac {1}{n}}\sum _{i=1}^{n}V(y_{i},f(x_{i}))={\frac {1}{n}}\sum _{i=1}^{n}(y_{i}-f(x_{i}))^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/cb1ef6bda3c930a106372037c76a405bd9d44a17" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:47.348ex; height:6.843ex;" alt="{\displaystyle \varepsilon (f)={\frac {1}{n}}\sum _{i=1}^{n}V(y_{i},f(x_{i}))={\frac {1}{n}}\sum _{i=1}^{n}(y_{i}-f(x_{i}))^{2}}"></span> </p><p>However, if the functions are from a relatively unconstrained space, such as the set of square-integrable functions on <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/68baa052181f707c662844a465bfeeb135e82bab" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.98ex; height:2.176ex;" alt="{\displaystyle X}"></span>, this approach may overfit the training data, and lead to poor generalization. Thus, it should somehow constrain or penalize the complexity of the function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/132e57acb643253e7810ee9702d9581f159a1c61" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.279ex; height:2.509ex;" alt="{\displaystyle f}"></span>. In RLS, this is accomplished by choosing functions from a reproducing kernel Hilbert space (RKHS) <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\mathcal {H}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">H</mi> </mrow> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\mathcal {H}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/19ef4c7b923a5125ac91aa491838a95ee15b804f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.964ex; height:2.176ex;" alt="{\displaystyle {\mathcal {H}}}"></span>, and adding a regularization term to the objective function, proportional to the norm of the function in <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\mathcal {H}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">H</mi> </mrow> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\mathcal {H}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/19ef4c7b923a5125ac91aa491838a95ee15b804f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.964ex; height:2.176ex;" alt="{\displaystyle {\mathcal {H}}}"></span>: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \inf _{f\in F}\varepsilon (f)+\lambda R(f),\lambda >0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <munder> <mo movablelimits="true" form="prefix">inf</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> <mo>∈<!-- ∈ --></mo> <mi>F</mi> </mrow> </munder> <mi>ε<!-- ε --></mi> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">)</mo> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>R</mi> <mo stretchy="false">(</mo> <mi>f</mi> <mo stretchy="false">)</mo> <mo>,</mo> <mi>λ<!-- λ --></mi> <mo>></mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \inf _{f\in F}\varepsilon (f)+\lambda R(f),\lambda >0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/28bafb75020450ca0daf96182c5ffb05816d15cd" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.338ex; width:23.487ex; height:4.343ex;" alt="{\displaystyle \inf _{f\in F}\varepsilon (f)+\lambda R(f),\lambda >0}"></span> </p> <div class="mw-heading mw-heading2"><h2 id="Kernel_formulation">Kernel formulation</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=2" title="Edit section: Kernel formulation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-heading mw-heading3"><h3 id="Definition_of_RKHS">Definition of RKHS</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=3" title="Edit section: Definition of RKHS"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>A RKHS can be defined by a <a href="/wiki/Symmetric_function" title="Symmetric function">symmetric</a> <a href="/wiki/Positive-definite_kernel_function" class="mw-redirect" title="Positive-definite kernel function">positive-definite kernel function</a> <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(x,z)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>z</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(x,z)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/772fba0af3be5acaa89fe573587127aa68c73674" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.327ex; height:2.843ex;" alt="{\displaystyle K(x,z)}"></span> with the reproducing property: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \langle K_{x},f\rangle _{\mathcal {H}}=f(x),}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <msub> <mi>K</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>x</mi> </mrow> </msub> <mo>,</mo> <mi>f</mi> <msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">H</mi> </mrow> </mrow> </msub> <mo>=</mo> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>,</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \langle K_{x},f\rangle _{\mathcal {H}}=f(x),}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9e93622abfff58c86e6fe5bbc0c5add425788bc7" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:17.051ex; height:2.843ex;" alt="{\displaystyle \langle K_{x},f\rangle _{\mathcal {H}}=f(x),}"></span> where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K_{x}(z)=K(x,z)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>K</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>x</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>z</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mi>K</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>z</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K_{x}(z)=K(x,z)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4a9dd75a0008646992ae498fc64434413146a202" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:16.468ex; height:2.843ex;" alt="{\displaystyle K_{x}(z)=K(x,z)}"></span>. The RKHS for a kernel <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2b76fce82a62ed5461908f0dc8f037de4e3686b0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.066ex; height:2.176ex;" alt="{\displaystyle K}"></span> consists of the <a href="/wiki/Complete_metric_space#Completion" title="Complete metric space">completion</a> of the space of functions spanned by <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \left\{K_{x}\mid x\in X\right\}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow> <mo>{</mo> <mrow> <msub> <mi>K</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>x</mi> </mrow> </msub> <mo>∣<!-- ∣ --></mo> <mi>x</mi> <mo>∈<!-- ∈ --></mo> <mi>X</mi> </mrow> <mo>}</mo> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \left\{K_{x}\mid x\in X\right\}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9f34315e8fa9ab06c178546d78717d473333a80c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:13.558ex; height:2.843ex;" alt="{\displaystyle \left\{K_{x}\mid x\in X\right\}}"></span>: <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle f(x)=\sum _{i=1}^{n}\alpha _{i}K_{x_{i}}(x),\,f\in {\mathcal {H}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <msub> <mi>α<!-- α --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>K</mi> <mrow class="MJX-TeXAtom-ORD"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mrow> </msub> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>,</mo> <mspace width="thinmathspace" /> <mi>f</mi> <mo>∈<!-- ∈ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">H</mi> </mrow> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle f(x)=\sum _{i=1}^{n}\alpha _{i}K_{x_{i}}(x),\,f\in {\mathcal {H}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5cc9d89fc9f2417ade446b9702813e6841546fb4" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:29.958ex; height:3.176ex;" alt="{\textstyle f(x)=\sum _{i=1}^{n}\alpha _{i}K_{x_{i}}(x),\,f\in {\mathcal {H}}}"></span>, where all <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \alpha _{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>α<!-- α --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \alpha _{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3b1fb627423abe4988b7ed88d4920bf1ec074790" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.287ex; height:2.009ex;" alt="{\displaystyle \alpha _{i}}"></span> are real numbers. Some commonly used kernels include the linear kernel, inducing the space of linear functions: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(x,z)=x^{\mathsf {T}}z,}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>z</mi> <mo stretchy="false">)</mo> <mo>=</mo> <msup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>z</mi> <mo>,</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(x,z)=x^{\mathsf {T}}z,}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1752cc7e745ef657aa006a042a85bfda4928e4fc" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:14.842ex; height:3.176ex;" alt="{\displaystyle K(x,z)=x^{\mathsf {T}}z,}"></span> the polynomial kernel, inducing the space of polynomial functions of order <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle d}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>d</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle d}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e85ff03cbe0c7341af6b982e47e9f90d235c66ab" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.216ex; height:2.176ex;" alt="{\displaystyle d}"></span>: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(x,z)=\left(x^{\mathsf {T}}z+1\right)^{d},}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>z</mi> <mo stretchy="false">)</mo> <mo>=</mo> <msup> <mrow> <mo>(</mo> <mrow> <msup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>z</mi> <mo>+</mo> <mn>1</mn> </mrow> <mo>)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> <mo>,</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(x,z)=\left(x^{\mathsf {T}}z+1\right)^{d},}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5581e7d68e6194c454b69aefd3508a5e7ffc534f" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:22.066ex; height:3.843ex;" alt="{\displaystyle K(x,z)=\left(x^{\mathsf {T}}z+1\right)^{d},}"></span> and the Gaussian kernel: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(x,z)=e^{-{\left\|x-z\right\|^{2}}/{\sigma ^{2}}}.}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>z</mi> <mo stretchy="false">)</mo> <mo>=</mo> <msup> <mi>e</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mrow class="MJX-TeXAtom-ORD"> <msup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>x</mi> <mo>−<!-- − --></mo> <mi>z</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>/</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <msup> <mi>σ<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> </mrow> </mrow> </msup> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(x,z)=e^{-{\left\|x-z\right\|^{2}}/{\sigma ^{2}}}.}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d0e273291e98b7c72921633d24dec95af2ce7f27" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:21.725ex; height:3.676ex;" alt="{\displaystyle K(x,z)=e^{-{\left\|x-z\right\|^{2}}/{\sigma ^{2}}}.}"></span> </p><p>Note that for an arbitrary loss function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle V}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>V</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle V}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/af0f6064540e84211d0ffe4dac72098adfa52845" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.787ex; height:2.176ex;" alt="{\displaystyle V}"></span>, this approach defines a general class of algorithms named Tikhonov regularization. For instance, using the <a href="/wiki/Hinge_loss" title="Hinge loss">hinge loss</a> leads to the <a href="/wiki/Support_vector_machine" title="Support vector machine">support vector machine</a> algorithm, and using the <a href="/w/index.php?title=Epsilon-insensitive_loss&action=edit&redlink=1" class="new" title="Epsilon-insensitive loss (page does not exist)">epsilon-insensitive loss</a> leads to <a href="/wiki/Support_vector_regression" class="mw-redirect" title="Support vector regression">support vector regression</a>. </p> <div class="mw-heading mw-heading3"><h3 id="Arbitrary_kernel">Arbitrary kernel</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=4" title="Edit section: Arbitrary kernel"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The <a href="/wiki/Representer_theorem" title="Representer theorem">representer theorem</a> guarantees that the solution can be written as: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f(x)=\sum _{i=1}^{n}c_{i}K(x_{i},x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>,</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f(x)=\sum _{i=1}^{n}c_{i}K(x_{i},x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/ef823111d610d2d2fbe43352a48f62d004175be6" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:21.433ex; height:6.843ex;" alt="{\displaystyle f(x)=\sum _{i=1}^{n}c_{i}K(x_{i},x)}"></span> for some <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c\in \mathbb {R} ^{n}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>c</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c\in \mathbb {R} ^{n}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/dba1a4ded4b663586672ebe0a40b4456844f0029" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:6.744ex; height:2.343ex;" alt="{\displaystyle c\in \mathbb {R} ^{n}}"></span>. </p><p>The minimization problem can be expressed as: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \min _{c\in \mathbb {R} ^{n}}{\frac {1}{n}}\left\|Y-Kc\right\|_{\mathbb {R} ^{n}}^{2}+\lambda \left\|f\right\|_{H}^{2},}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <munder> <mo movablelimits="true" form="prefix">min</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mrow> </munder> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi>K</mi> <mi>c</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <mi>λ<!-- λ --></mi> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mi>f</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>H</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>,</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \min _{c\in \mathbb {R} ^{n}}{\frac {1}{n}}\left\|Y-Kc\right\|_{\mathbb {R} ^{n}}^{2}+\lambda \left\|f\right\|_{H}^{2},}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8db748d08ccdfe2136e0a0434cbcb4efe53324bf" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.171ex; width:29.11ex; height:5.509ex;" alt="{\displaystyle \min _{c\in \mathbb {R} ^{n}}{\frac {1}{n}}\left\|Y-Kc\right\|_{\mathbb {R} ^{n}}^{2}+\lambda \left\|f\right\|_{H}^{2},}"></span> where, with some abuse of notation, the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i,j}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> <mo>,</mo> <mi>j</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i,j}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f4cbf8bbc622154cda8208d6e339495fe16a1f9a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.794ex; height:2.509ex;" alt="{\displaystyle i,j}"></span> entry of kernel matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2b76fce82a62ed5461908f0dc8f037de4e3686b0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.066ex; height:2.176ex;" alt="{\displaystyle K}"></span> (as opposed to kernel function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(\cdot ,\cdot )}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <mo>⋅<!-- ⋅ --></mo> <mo>,</mo> <mo>⋅<!-- ⋅ --></mo> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(\cdot ,\cdot )}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/37f4b57edf96eb6983558a1d5c38a38d8dd10e40" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.203ex; height:2.843ex;" alt="{\displaystyle K(\cdot ,\cdot )}"></span>) is <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(x_{i},x_{j})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(x_{i},x_{j})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/65ef825b9f7c255206d832544df3a5952951f567" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:9.278ex; height:3.009ex;" alt="{\displaystyle K(x_{i},x_{j})}"></span>. </p><p>For such a function, <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\begin{aligned}\left\|f\right\|_{H}^{2}&=\langle f,f\rangle _{H}\\[1ex]&=\left\langle \sum _{i=1}^{n}c_{i}K(x_{i},\cdot ),\sum _{j=1}^{n}c_{j}K(x_{j},\cdot )\right\rangle _{H}\\[1ex]&=\sum _{i=1}^{n}\sum _{j=1}^{n}c_{i}c_{j}\left\langle K(x_{i},\cdot ),K(x_{j},\cdot )\right\rangle _{H}\\&=\sum _{i=1}^{n}\sum _{j=1}^{n}c_{i}c_{j}K(x_{i},x_{j})\\&=c^{\mathsf {T}}Kc,\end{aligned}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mtable columnalign="right left right left right left right left right left right left" rowspacing="0.73em 0.73em 0.3em 0.3em 0.3em" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"> <mtr> <mtd> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mi>f</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>H</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> </mtd> <mtd> <mi></mi> <mo>=</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>f</mi> <mo>,</mo> <mi>f</mi> <msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>H</mi> </mrow> </msub> </mtd> </mtr> <mtr> <mtd /> <mtd> <mi></mi> <mo>=</mo> <msub> <mrow> <mo>⟨</mo> <mrow> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>,</mo> <mo>⋅<!-- ⋅ --></mo> <mo stretchy="false">)</mo> <mo>,</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>,</mo> <mo>⋅<!-- ⋅ --></mo> <mo stretchy="false">)</mo> </mrow> <mo>⟩</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>H</mi> </mrow> </msub> </mtd> </mtr> <mtr> <mtd /> <mtd> <mi></mi> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <msub> <mrow> <mo>⟨</mo> <mrow> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>,</mo> <mo>⋅<!-- ⋅ --></mo> <mo stretchy="false">)</mo> <mo>,</mo> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>,</mo> <mo>⋅<!-- ⋅ --></mo> <mo stretchy="false">)</mo> </mrow> <mo>⟩</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>H</mi> </mrow> </msub> </mtd> </mtr> <mtr> <mtd /> <mtd> <mi></mi> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd /> <mtd> <mi></mi> <mo>=</mo> <msup> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>K</mi> <mi>c</mi> <mo>,</mo> </mtd> </mtr> </mtable> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\begin{aligned}\left\|f\right\|_{H}^{2}&=\langle f,f\rangle _{H}\\[1ex]&=\left\langle \sum _{i=1}^{n}c_{i}K(x_{i},\cdot ),\sum _{j=1}^{n}c_{j}K(x_{j},\cdot )\right\rangle _{H}\\[1ex]&=\sum _{i=1}^{n}\sum _{j=1}^{n}c_{i}c_{j}\left\langle K(x_{i},\cdot ),K(x_{j},\cdot )\right\rangle _{H}\\&=\sum _{i=1}^{n}\sum _{j=1}^{n}c_{i}c_{j}K(x_{i},x_{j})\\&=c^{\mathsf {T}}Kc,\end{aligned}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a30e14685b6c7600e236a0e04bb08c4f21291d92" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -15.171ex; width:42.305ex; height:31.509ex;" alt="{\displaystyle {\begin{aligned}\left\|f\right\|_{H}^{2}&=\langle f,f\rangle _{H}\\[1ex]&=\left\langle \sum _{i=1}^{n}c_{i}K(x_{i},\cdot ),\sum _{j=1}^{n}c_{j}K(x_{j},\cdot )\right\rangle _{H}\\[1ex]&=\sum _{i=1}^{n}\sum _{j=1}^{n}c_{i}c_{j}\left\langle K(x_{i},\cdot ),K(x_{j},\cdot )\right\rangle _{H}\\&=\sum _{i=1}^{n}\sum _{j=1}^{n}c_{i}c_{j}K(x_{i},x_{j})\\&=c^{\mathsf {T}}Kc,\end{aligned}}}"></span> </p><p>The following minimization problem can be obtained: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \min _{c\in \mathbb {R} ^{n}}{\frac {1}{n}}\left\|Y-Kc\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}Kc.}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <munder> <mo movablelimits="true" form="prefix">min</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mrow> </munder> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi>K</mi> <mi>c</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <mi>λ<!-- λ --></mi> <msup> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>K</mi> <mi>c</mi> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \min _{c\in \mathbb {R} ^{n}}{\frac {1}{n}}\left\|Y-Kc\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}Kc.}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/bde11585e44b1dd17abbc8e13623d690a4ba64a1" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.171ex; width:29.246ex; height:5.509ex;" alt="{\displaystyle \min _{c\in \mathbb {R} ^{n}}{\frac {1}{n}}\left\|Y-Kc\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}Kc.}"></span> </p><p>As the sum of convex functions is convex, the solution is unique and its minimum can be found by setting the gradient with respect to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>c</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/86a67b81c2de995bd608d5b2df50cd8cd7d92455" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.007ex; height:1.676ex;" alt="{\displaystyle c}"></span> to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle 0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle 0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2aae8864a3c1fec9585261791a809ddec1489950" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.162ex; height:2.176ex;" alt="{\displaystyle 0}"></span>: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle -{\frac {1}{n}}K\left(Y-Kc\right)+\lambda Kc=0\Rightarrow K\left(K+\lambda nI\right)c=KY\Rightarrow c=\left(K+\lambda nI\right)^{-1}Y,}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo>−<!-- − --></mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <mi>K</mi> <mrow> <mo>(</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi>K</mi> <mi>c</mi> </mrow> <mo>)</mo> </mrow> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>K</mi> <mi>c</mi> <mo>=</mo> <mn>0</mn> <mo stretchy="false">⇒<!-- ⇒ --></mo> <mi>K</mi> <mrow> <mo>(</mo> <mrow> <mi>K</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>n</mi> <mi>I</mi> </mrow> <mo>)</mo> </mrow> <mi>c</mi> <mo>=</mo> <mi>K</mi> <mi>Y</mi> <mo stretchy="false">⇒<!-- ⇒ --></mo> <mi>c</mi> <mo>=</mo> <msup> <mrow> <mo>(</mo> <mrow> <mi>K</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>n</mi> <mi>I</mi> </mrow> <mo>)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msup> <mi>Y</mi> <mo>,</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle -{\frac {1}{n}}K\left(Y-Kc\right)+\lambda Kc=0\Rightarrow K\left(K+\lambda nI\right)c=KY\Rightarrow c=\left(K+\lambda nI\right)^{-1}Y,}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e9763aed05852dcb3bb69259988143e050bbb33f" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.838ex; width:75.663ex; height:5.176ex;" alt="{\displaystyle -{\frac {1}{n}}K\left(Y-Kc\right)+\lambda Kc=0\Rightarrow K\left(K+\lambda nI\right)c=KY\Rightarrow c=\left(K+\lambda nI\right)^{-1}Y,}"></span> where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c\in \mathbb {R} ^{n}.}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>c</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c\in \mathbb {R} ^{n}.}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/822b56f6a614794397d2532309ac36abf54fdb80" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:7.391ex; height:2.343ex;" alt="{\displaystyle c\in \mathbb {R} ^{n}.}"></span> </p> <div class="mw-heading mw-heading4"><h4 id="Complexity">Complexity</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=5" title="Edit section: Complexity"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The complexity of training is basically the cost of computing the kernel matrix plus the cost of solving the linear system which is roughly <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(n^{3})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>3</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(n^{3})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6b04f5c5cfea38f43406d9442387ad28555e2609" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.032ex; height:3.176ex;" alt="{\displaystyle O(n^{3})}"></span>. The computation of the kernel matrix for the linear or <a href="/wiki/Gaussian_kernel" class="mw-redirect" title="Gaussian kernel">Gaussian kernel</a> is <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(n^{2}D)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>n</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mi>D</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(n^{2}D)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2318a1a872d03c89a1a97536fe8c702243219e1a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.956ex; height:3.176ex;" alt="{\displaystyle O(n^{2}D)}"></span>. The complexity of testing is <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(n)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>n</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(n)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/34109fe397fdcff370079185bfdb65826cb5565a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:4.977ex; height:2.843ex;" alt="{\displaystyle O(n)}"></span>. </p> <div class="mw-heading mw-heading3"><h3 id="Prediction">Prediction</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=6" title="Edit section: Prediction"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The prediction at a new test point <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{*}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{*}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/54181bafc8b3401909a790683476457984f484ba" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.293ex; margin-bottom: -0.379ex; width:2.384ex; height:2.009ex;" alt="{\displaystyle x_{*}}"></span> is: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f(x_{*})=\sum _{i=1}^{n}c_{i}K(x_{i},x_{*})=K(X,X_{*})^{\mathsf {T}}c}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msub> <mo stretchy="false">)</mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mi>K</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msub> <mo stretchy="false">)</mo> <mo>=</mo> <mi>K</mi> <mo stretchy="false">(</mo> <mi>X</mi> <mo>,</mo> <msub> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msub> <msup> <mo stretchy="false">)</mo> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>c</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f(x_{*})=\sum _{i=1}^{n}c_{i}K(x_{i},x_{*})=K(X,X_{*})^{\mathsf {T}}c}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/55cdd5ed44773888fdc5f0d93c75a3a5a9772b51" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:38.866ex; height:6.843ex;" alt="{\displaystyle f(x_{*})=\sum _{i=1}^{n}c_{i}K(x_{i},x_{*})=K(X,X_{*})^{\mathsf {T}}c}"></span> </p> <div class="mw-heading mw-heading3"><h3 id="Linear_kernel">Linear kernel</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=7" title="Edit section: Linear kernel"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>For convenience a vector notation is introduced. Let <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/68baa052181f707c662844a465bfeeb135e82bab" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.98ex; height:2.176ex;" alt="{\displaystyle X}"></span> be an <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n\times d}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> <mo>×<!-- × --></mo> <mi>d</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n\times d}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/965fa37801cd698e5084f9d0c1c8f8d772ce3934" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.451ex; height:2.176ex;" alt="{\displaystyle n\times d}"></span> matrix, where the rows are input vectors, and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle Y}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>Y</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle Y}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/961d67d6b454b4df2301ac571808a3538b3a6d3f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.171ex; width:1.773ex; height:2.009ex;" alt="{\displaystyle Y}"></span> a <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n\times 1}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> <mo>×<!-- × --></mo> <mn>1</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n\times 1}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d24148f103e1cccb60addeeb0a64cb1c3d5622e0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.398ex; height:2.176ex;" alt="{\displaystyle n\times 1}"></span> vector where the entries are corresponding outputs. In terms of vectors, the kernel matrix can be written as <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K=XX^{\mathsf {T}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo>=</mo> <mi>X</mi> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K=XX^{\mathsf {T}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/fa037e04fe822a6534f5da1b88ca863a528ea530" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:10.493ex; height:2.676ex;" alt="{\displaystyle K=XX^{\mathsf {T}}}"></span>. The learning function can be written as: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f(x_{*})=K_{x_{*}}c=x_{*}^{\mathsf {T}}X^{\mathsf {T}}c=x_{*}^{\mathsf {T}}w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msub> <mo stretchy="false">)</mo> <mo>=</mo> <msub> <mi>K</mi> <mrow class="MJX-TeXAtom-ORD"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msub> </mrow> </msub> <mi>c</mi> <mo>=</mo> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>c</mi> <mo>=</mo> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f(x_{*})=K_{x_{*}}c=x_{*}^{\mathsf {T}}X^{\mathsf {T}}c=x_{*}^{\mathsf {T}}w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3fb83005e7f20351cc4df1a67b9f8a4396213a8d" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.583ex; margin-bottom: -0.255ex; width:31.133ex; height:3.176ex;" alt="{\displaystyle f(x_{*})=K_{x_{*}}c=x_{*}^{\mathsf {T}}X^{\mathsf {T}}c=x_{*}^{\mathsf {T}}w}"></span> </p><p>Here we define <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w=X^{\mathsf {T}}c,w\in \mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> <mo>=</mo> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>c</mi> <mo>,</mo> <mi>w</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w=X^{\mathsf {T}}c,w\in \mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/bead8078ea5d23b21670a2dad3870f5a18d76aa4" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:17.427ex; height:3.009ex;" alt="{\displaystyle w=X^{\mathsf {T}}c,w\in \mathbb {R} ^{d}}"></span>. The objective function can be rewritten as: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\begin{aligned}{\frac {1}{n}}\left\|Y-Kc\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}Kc&={\frac {1}{n}}\left\|y-XX^{\mathsf {T}}c\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}XX^{\mathsf {T}}c\\[1ex]&={\frac {1}{n}}\left\|y-Xw\right\|_{\mathbb {R} ^{n}}^{2}+\lambda \left\|w\right\|_{\mathbb {R} ^{d}}^{2}\end{aligned}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mtable columnalign="right left right left right left right left right left right left" rowspacing="0.73em 0.3em" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"> <mtr> <mtd> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi>K</mi> <mi>c</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <mi>λ<!-- λ --></mi> <msup> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>K</mi> <mi>c</mi> </mtd> <mtd> <mi></mi> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>y</mi> <mo>−<!-- − --></mo> <mi>X</mi> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>c</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <mi>λ<!-- λ --></mi> <msup> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>c</mi> </mtd> </mtr> <mtr> <mtd /> <mtd> <mi></mi> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>y</mi> <mo>−<!-- − --></mo> <mi>X</mi> <mi>w</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <mi>λ<!-- λ --></mi> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> </mtd> </mtr> </mtable> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\begin{aligned}{\frac {1}{n}}\left\|Y-Kc\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}Kc&={\frac {1}{n}}\left\|y-XX^{\mathsf {T}}c\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}XX^{\mathsf {T}}c\\[1ex]&={\frac {1}{n}}\left\|y-Xw\right\|_{\mathbb {R} ^{n}}^{2}+\lambda \left\|w\right\|_{\mathbb {R} ^{d}}^{2}\end{aligned}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f2ff11be5ddb32836b114fe2c8c838d64157777b" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -5.338ex; width:58.262ex; height:11.843ex;" alt="{\displaystyle {\begin{aligned}{\frac {1}{n}}\left\|Y-Kc\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}Kc&={\frac {1}{n}}\left\|y-XX^{\mathsf {T}}c\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}XX^{\mathsf {T}}c\\[1ex]&={\frac {1}{n}}\left\|y-Xw\right\|_{\mathbb {R} ^{n}}^{2}+\lambda \left\|w\right\|_{\mathbb {R} ^{d}}^{2}\end{aligned}}}"></span> </p><p>The first term is the objective function from <a href="/wiki/Ordinary_least_squares" title="Ordinary least squares">ordinary least squares</a> (OLS) regression, corresponding to the <a href="/wiki/Residual_sum_of_squares" title="Residual sum of squares">residual sum of squares</a>. The second term is a regularization term, not present in OLS, which penalizes large <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> values. As a smooth finite dimensional problem is considered and it is possible to apply standard calculus tools. In order to minimize the objective function, the gradient is calculated with respect to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> and set it to zero: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X^{\mathsf {T}}Xw-X^{\mathsf {T}}y+\lambda nw=0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> <mi>w</mi> <mo>−<!-- − --></mo> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>y</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>n</mi> <mi>w</mi> <mo>=</mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X^{\mathsf {T}}Xw-X^{\mathsf {T}}y+\lambda nw=0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/ad0270b5e401ee4065d0d5813fdfa9b07d94b37c" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:25.852ex; height:3.009ex;" alt="{\displaystyle X^{\mathsf {T}}Xw-X^{\mathsf {T}}y+\lambda nw=0}"></span> <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w=\left(X^{\mathsf {T}}X+\lambda nI\right)^{-1}X^{\mathsf {T}}y}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> <mo>=</mo> <msup> <mrow> <mo>(</mo> <mrow> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>n</mi> <mi>I</mi> </mrow> <mo>)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msup> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>y</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w=\left(X^{\mathsf {T}}X+\lambda nI\right)^{-1}X^{\mathsf {T}}y}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/96cf4516af5baf160368d5ec6d39ef52a09ad681" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:25.819ex; height:3.843ex;" alt="{\displaystyle w=\left(X^{\mathsf {T}}X+\lambda nI\right)^{-1}X^{\mathsf {T}}y}"></span> </p><p>This solution closely resembles that of standard linear regression, with an extra term <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda I}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> <mi>I</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda I}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/eb3c18741afe6ce4adee519b4204ba307b6d671b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.527ex; height:2.176ex;" alt="{\displaystyle \lambda I}"></span>. If the assumptions of OLS regression hold, the solution <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w=\left(X^{\mathsf {T}}X\right)^{-1}X^{\mathsf {T}}y}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> <mo>=</mo> <msup> <mrow> <mo>(</mo> <mrow> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> </mrow> <mo>)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msup> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>y</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w=\left(X^{\mathsf {T}}X\right)^{-1}X^{\mathsf {T}}y}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8a4d9ff2a832f72925ff760805cb74d0369e3d7d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:19.057ex; height:3.843ex;" alt="{\displaystyle w=\left(X^{\mathsf {T}}X\right)^{-1}X^{\mathsf {T}}y}"></span>, with <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda =0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> <mo>=</mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda =0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/00c4bba30544017fe76932de5a4e25adb5512d95" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.616ex; height:2.176ex;" alt="{\displaystyle \lambda =0}"></span>, is an unbiased estimator, and is the minimum-variance linear unbiased estimator, according to the <a href="/wiki/Gauss%E2%80%93Markov_theorem" title="Gauss–Markov theorem">Gauss–Markov theorem</a>. The term <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda nI}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> <mi>n</mi> <mi>I</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda nI}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/77ffefe61cc0809f77300eab5bb95c12d0b8e264" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:3.922ex; height:2.176ex;" alt="{\displaystyle \lambda nI}"></span> therefore leads to a biased solution; however, it also tends to reduce variance. This is easy to see, as the <a href="/wiki/Covariance" title="Covariance">covariance</a> matrix of the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span>-values is proportional to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \left(X^{\mathsf {T}}X+\lambda nI\right)^{-1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mrow> <mo>(</mo> <mrow> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>n</mi> <mi>I</mi> </mrow> <mo>)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \left(X^{\mathsf {T}}X+\lambda nI\right)^{-1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6be2107328283bcd69aa8560cf824e8d3171819f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:16.553ex; height:3.843ex;" alt="{\displaystyle \left(X^{\mathsf {T}}X+\lambda nI\right)^{-1}}"></span>, and therefore large values of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b43d0ea3c9c025af1be9128e62a18fa74bedda2a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.355ex; height:2.176ex;" alt="{\displaystyle \lambda }"></span> will lead to lower variance. Therefore, manipulating <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b43d0ea3c9c025af1be9128e62a18fa74bedda2a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.355ex; height:2.176ex;" alt="{\displaystyle \lambda }"></span> corresponds to trading-off bias and variance. For problems with high-variance <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> estimates, such as cases with relatively small <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a601995d55609f2d9f5e233e36fbe9ea26011b3b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.395ex; height:1.676ex;" alt="{\displaystyle n}"></span> or with correlated regressors, the optimal prediction accuracy may be obtained by using a nonzero <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b43d0ea3c9c025af1be9128e62a18fa74bedda2a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.355ex; height:2.176ex;" alt="{\displaystyle \lambda }"></span>, and thus introducing some bias to reduce variance. Furthermore, it is not uncommon in <a href="/wiki/Machine_learning" title="Machine learning">machine learning</a> to have cases where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n<d}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> <mo><</mo> <mi>d</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n<d}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/fb24d50a959320f2b673f848b6195d1a6ddf0dba" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.709ex; height:2.176ex;" alt="{\displaystyle n<d}"></span>, in which case <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X^{\mathsf {T}}X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X^{\mathsf {T}}X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/75a4cb7b8bf2de8f4b677bee7ac95cd1fd3ef35f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.328ex; height:2.676ex;" alt="{\displaystyle X^{\mathsf {T}}X}"></span> is <a href="/wiki/Rank_(linear_algebra)" title="Rank (linear algebra)">rank</a>-deficient, and a nonzero <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b43d0ea3c9c025af1be9128e62a18fa74bedda2a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.355ex; height:2.176ex;" alt="{\displaystyle \lambda }"></span> is necessary to compute <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \left(X^{\mathsf {T}}X+\lambda nI\right)^{-1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mrow> <mo>(</mo> <mrow> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>n</mi> <mi>I</mi> </mrow> <mo>)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \left(X^{\mathsf {T}}X+\lambda nI\right)^{-1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6be2107328283bcd69aa8560cf824e8d3171819f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:16.553ex; height:3.843ex;" alt="{\displaystyle \left(X^{\mathsf {T}}X+\lambda nI\right)^{-1}}"></span>. </p> <div class="mw-heading mw-heading4"><h4 id="Complexity_2">Complexity</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=8" title="Edit section: Complexity"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The parameter <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b43d0ea3c9c025af1be9128e62a18fa74bedda2a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.355ex; height:2.176ex;" alt="{\displaystyle \lambda }"></span> controls the invertibility of the matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X^{\mathsf {T}}X+\lambda nI}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>n</mi> <mi>I</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X^{\mathsf {T}}X+\lambda nI}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/348dd38744289e33f8f4527516787220333019b5" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.505ex; width:12.09ex; height:2.843ex;" alt="{\displaystyle X^{\mathsf {T}}X+\lambda nI}"></span>. Several methods can be used to solve the above linear system, <a href="/wiki/Cholesky_decomposition" title="Cholesky decomposition">Cholesky decomposition</a> being probably the method of choice, since the matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X^{\mathsf {T}}X+\lambda nI}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>n</mi> <mi>I</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X^{\mathsf {T}}X+\lambda nI}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/348dd38744289e33f8f4527516787220333019b5" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.505ex; width:12.09ex; height:2.843ex;" alt="{\displaystyle X^{\mathsf {T}}X+\lambda nI}"></span> is <a href="/wiki/Symmetric" class="mw-redirect" title="Symmetric">symmetric</a> and <a href="/wiki/Positive_definite" class="mw-redirect" title="Positive definite">positive definite</a>. The complexity of this method is <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(nD^{2})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>n</mi> <msup> <mi>D</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(nD^{2})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1ee01ae2dbbb6b2dd27e05c6e98216addf8a8264" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.956ex; height:3.176ex;" alt="{\displaystyle O(nD^{2})}"></span> for training and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(D)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>D</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(D)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9d87211a0f49d38f36a0137820d235a4d84a2b79" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.507ex; height:2.843ex;" alt="{\displaystyle O(D)}"></span> for testing. The cost <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(nD^{2})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <mi>n</mi> <msup> <mi>D</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(nD^{2})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1ee01ae2dbbb6b2dd27e05c6e98216addf8a8264" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:7.956ex; height:3.176ex;" alt="{\displaystyle O(nD^{2})}"></span> is essentially that of computing <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X^{\mathsf {T}}X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X^{\mathsf {T}}X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/75a4cb7b8bf2de8f4b677bee7ac95cd1fd3ef35f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.328ex; height:2.676ex;" alt="{\displaystyle X^{\mathsf {T}}X}"></span>, whereas the inverse computation (or rather the solution of the linear system) is roughly <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle O(D^{3})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>O</mi> <mo stretchy="false">(</mo> <msup> <mi>D</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>3</mn> </mrow> </msup> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle O(D^{3})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/dc92ac9169edc5fea109f96b0628655dcefe05b2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.561ex; height:3.176ex;" alt="{\displaystyle O(D^{3})}"></span>. </p> <div class="mw-heading mw-heading2"><h2 id="Feature_maps_and_Mercer's_theorem"><span id="Feature_maps_and_Mercer.27s_theorem"></span>Feature maps and Mercer's theorem</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=9" title="Edit section: Feature maps and Mercer's theorem"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>In this section it will be shown how to extend RLS to any kind of reproducing kernel K. Instead of linear kernel a feature map is considered <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Phi :X\to F}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi mathvariant="normal">Φ<!-- Φ --></mi> <mo>:</mo> <mi>X</mi> <mo stretchy="false">→<!-- → --></mo> <mi>F</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Phi :X\to F}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/67b65ec6a5c3b73a64eb0ea24a0644201752dc81" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:10.95ex; height:2.176ex;" alt="{\displaystyle \Phi :X\to F}"></span> for some Hilbert space <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/545fd099af8541605f7ee55f08225526be88ce57" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.741ex; height:2.176ex;" alt="{\displaystyle F}"></span>, called the feature space. In this case the kernel is defined as: The matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/68baa052181f707c662844a465bfeeb135e82bab" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.98ex; height:2.176ex;" alt="{\displaystyle X}"></span> is now replaced by the new data matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Phi }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi mathvariant="normal">Φ<!-- Φ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Phi }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/aed80a2011a3912b028ba32a52dfa57165455f24" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.678ex; height:2.176ex;" alt="{\displaystyle \Phi }"></span>, where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \Phi _{ij}=\varphi _{j}(x_{i})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi mathvariant="normal">Φ<!-- Φ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mi>j</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>φ<!-- φ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \Phi _{ij}=\varphi _{j}(x_{i})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e9918ce545aa718de6531df0dab425727fe7c2ff" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:12.622ex; height:3.009ex;" alt="{\displaystyle \Phi _{ij}=\varphi _{j}(x_{i})}"></span>, or the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle j}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>j</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle j}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2f461e54f5c093e92a55547b9764291390f0b5d0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; margin-left: -0.027ex; width:0.985ex; height:2.509ex;" alt="{\displaystyle j}"></span>-th component of the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \varphi (x_{i})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>φ<!-- φ --></mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \varphi (x_{i})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9bc245634fd0d41fdc9b7c16d7a2362c2c9c2af7" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.459ex; height:2.843ex;" alt="{\displaystyle \varphi (x_{i})}"></span>. <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(x,x')=\langle \Phi (x),\Phi (x')\rangle _{F}.}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <msup> <mi>x</mi> <mo>′</mo> </msup> <mo stretchy="false">)</mo> <mo>=</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi mathvariant="normal">Φ<!-- Φ --></mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>,</mo> <mi mathvariant="normal">Φ<!-- Φ --></mi> <mo stretchy="false">(</mo> <msup> <mi>x</mi> <mo>′</mo> </msup> <mo stretchy="false">)</mo> <msub> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>F</mi> </mrow> </msub> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(x,x')=\langle \Phi (x),\Phi (x')\rangle _{F}.}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5b04b870c868ed3c25011cab63f6169fa074da29" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:26.624ex; height:3.009ex;" alt="{\displaystyle K(x,x')=\langle \Phi (x),\Phi (x')\rangle _{F}.}"></span> It means that for a given training set <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K=\Phi \Phi ^{\mathsf {T}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo>=</mo> <mi mathvariant="normal">Φ<!-- Φ --></mi> <msup> <mi mathvariant="normal">Φ<!-- Φ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K=\Phi \Phi ^{\mathsf {T}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0eb4846f57c7e829c013f3ff4143118eae198eac" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:9.872ex; height:2.676ex;" alt="{\displaystyle K=\Phi \Phi ^{\mathsf {T}}}"></span>. Thus, the objective function can be written as <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \min _{c\in \mathbb {R} ^{n}}\left\|Y-\Phi \Phi ^{\mathsf {T}}c\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}\Phi \Phi ^{\mathsf {T}}c.}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <munder> <mo movablelimits="true" form="prefix">min</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mrow> </munder> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi mathvariant="normal">Φ<!-- Φ --></mi> <msup> <mi mathvariant="normal">Φ<!-- Φ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>c</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <mi>λ<!-- λ --></mi> <msup> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi mathvariant="normal">Φ<!-- Φ --></mi> <msup> <mi mathvariant="normal">Φ<!-- Φ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>c</mi> <mo>.</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \min _{c\in \mathbb {R} ^{n}}\left\|Y-\Phi \Phi ^{\mathsf {T}}c\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}\Phi \Phi ^{\mathsf {T}}c.}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/25385f7a5ecfb33d2406dac3ef2091fe4ca3f380" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.171ex; width:32.299ex; height:5.009ex;" alt="{\displaystyle \min _{c\in \mathbb {R} ^{n}}\left\|Y-\Phi \Phi ^{\mathsf {T}}c\right\|_{\mathbb {R} ^{n}}^{2}+\lambda c^{\mathsf {T}}\Phi \Phi ^{\mathsf {T}}c.}"></span> </p><p>This approach is known as the <a href="/wiki/Kernel_trick" class="mw-redirect" title="Kernel trick">kernel trick</a>. This technique can significantly simplify the computational operations. If <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/545fd099af8541605f7ee55f08225526be88ce57" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.741ex; height:2.176ex;" alt="{\displaystyle F}"></span> is high dimensional, computing <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \varphi (x_{i})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>φ<!-- φ --></mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \varphi (x_{i})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9bc245634fd0d41fdc9b7c16d7a2362c2c9c2af7" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.459ex; height:2.843ex;" alt="{\displaystyle \varphi (x_{i})}"></span> may be rather intensive. If the explicit form of the kernel function is known, we just need to compute and store the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n\times n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> <mo>×<!-- × --></mo> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n\times n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/59d2b4cb72e304526cf5b5887147729ea259da78" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.63ex; height:1.676ex;" alt="{\displaystyle n\times n}"></span> kernel matrix <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2b76fce82a62ed5461908f0dc8f037de4e3686b0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.066ex; height:2.176ex;" alt="{\displaystyle K}"></span>. </p><p>In fact, the <a href="/wiki/Hilbert_space" title="Hilbert space">Hilbert space</a> <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/545fd099af8541605f7ee55f08225526be88ce57" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.741ex; height:2.176ex;" alt="{\displaystyle F}"></span> need not be isomorphic to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \mathbb {R} ^{m}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>m</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \mathbb {R} ^{m}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6a87a024931038d1858dc22e8a194e5978c3412e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:3.353ex; height:2.343ex;" alt="{\displaystyle \mathbb {R} ^{m}}"></span>, and can be infinite dimensional. This follows from <a href="/wiki/Mercer%27s_theorem" title="Mercer's theorem">Mercer's theorem</a>, which states that a continuous, symmetric, positive definite kernel function can be expressed as <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(x,z)=\sum _{i=1}^{\infty }\sigma _{i}e_{i}(x)e_{i}(z)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>z</mi> <mo stretchy="false">)</mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="normal">∞<!-- ∞ --></mi> </mrow> </munderover> <msub> <mi>σ<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>e</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <msub> <mi>e</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>z</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(x,z)=\sum _{i=1}^{\infty }\sigma _{i}e_{i}(x)e_{i}(z)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/95efa5c7f8388a1775dbfd2e6406e022198e7fd2" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.005ex; width:26.097ex; height:6.843ex;" alt="{\displaystyle K(x,z)=\sum _{i=1}^{\infty }\sigma _{i}e_{i}(x)e_{i}(z)}"></span> where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle e_{i}(x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>e</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle e_{i}(x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/85ad5964c832f3fc0454ad4c35ebc5454e732d24" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.022ex; height:2.843ex;" alt="{\displaystyle e_{i}(x)}"></span> form an <a href="/wiki/Orthonormal_basis" title="Orthonormal basis">orthonormal basis</a> for <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \ell ^{2}(X)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>ℓ<!-- ℓ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">(</mo> <mi>X</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \ell ^{2}(X)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/21cf0070a0d75cc0906a9a5d37e2f20b029b4adb" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.813ex; height:3.176ex;" alt="{\displaystyle \ell ^{2}(X)}"></span>, and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \sigma _{i}\in \mathbb {R} }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>σ<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>∈<!-- ∈ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \sigma _{i}\in \mathbb {R} }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b81bb8a194c095aa0dd3e45d08819cf5b5063209" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:6.646ex; height:2.509ex;" alt="{\displaystyle \sigma _{i}\in \mathbb {R} }"></span>. If feature maps is defined <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \varphi (x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>φ<!-- φ --></mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \varphi (x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4c4046f1f2de7df04bde418ba2bc4d3898ac2385" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:4.659ex; height:2.843ex;" alt="{\displaystyle \varphi (x)}"></span> with components <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \varphi _{i}(x)={\sqrt {\sigma _{i}}}e_{i}(x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>φ<!-- φ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <msqrt> <msub> <mi>σ<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </msqrt> </mrow> <msub> <mi>e</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \varphi _{i}(x)={\sqrt {\sigma _{i}}}e_{i}(x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3417500ed5c7e323fc898fc356eb373313efd960" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.171ex; width:17.642ex; height:3.176ex;" alt="{\displaystyle \varphi _{i}(x)={\sqrt {\sigma _{i}}}e_{i}(x)}"></span>, it follows that <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(x,z)=\langle \varphi (x),\varphi (z)\rangle }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>z</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>φ<!-- φ --></mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>,</mo> <mi>φ<!-- φ --></mi> <mo stretchy="false">(</mo> <mi>z</mi> <mo stretchy="false">)</mo> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(x,z)=\langle \varphi (x),\varphi (z)\rangle }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c366a1bc35dff7e2336caf0be5ad0baa2be2333e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:22.345ex; height:2.843ex;" alt="{\displaystyle K(x,z)=\langle \varphi (x),\varphi (z)\rangle }"></span>. This demonstrates that any kernel can be associated with a feature map, and that RLS generally consists of linear RLS performed in some possibly higher-dimensional feature space. While Mercer's theorem shows how one feature map that can be associated with a kernel, in fact multiple feature maps can be associated with a given reproducing kernel. For instance, the map <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \varphi (x)=K_{x}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>φ<!-- φ --></mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <msub> <mi>K</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>x</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \varphi (x)=K_{x}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b825c5a404bd8714dc4d847fc908113357710da4" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:10.903ex; height:2.843ex;" alt="{\displaystyle \varphi (x)=K_{x}}"></span> satisfies the property <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle K(x,z)=\langle \varphi (x),\varphi (z)\rangle }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>K</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo>,</mo> <mi>z</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mo fence="false" stretchy="false">⟨<!-- ⟨ --></mo> <mi>φ<!-- φ --></mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>,</mo> <mi>φ<!-- φ --></mi> <mo stretchy="false">(</mo> <mi>z</mi> <mo stretchy="false">)</mo> <mo fence="false" stretchy="false">⟩<!-- ⟩ --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle K(x,z)=\langle \varphi (x),\varphi (z)\rangle }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c366a1bc35dff7e2336caf0be5ad0baa2be2333e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:22.345ex; height:2.843ex;" alt="{\displaystyle K(x,z)=\langle \varphi (x),\varphi (z)\rangle }"></span> for an arbitrary reproducing kernel. </p> <div class="mw-heading mw-heading2"><h2 id="Bayesian_interpretation">Bayesian interpretation</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=10" title="Edit section: Bayesian interpretation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1236090951">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}@media print{body.ns-0 .mw-parser-output .hatnote{display:none!important}}</style><div role="note" class="hatnote navigation-not-searchable">Further information: <a href="/wiki/Bayesian_linear_regression" title="Bayesian linear regression">Bayesian linear regression</a> and <a href="/wiki/Bayesian_interpretation_of_kernel_regularization" title="Bayesian interpretation of kernel regularization">Bayesian interpretation of kernel regularization</a></div> <p>Least squares can be viewed as a likelihood maximization under an assumption of normally distributed residuals. This is because the exponent of the <a href="/wiki/Gaussian_distribution" class="mw-redirect" title="Gaussian distribution">Gaussian distribution</a> is quadratic in the data, and so is the least-squares objective function. In this framework, the regularization terms of RLS can be understood to be encoding <a href="/wiki/Prior_distribution" class="mw-redirect" title="Prior distribution">priors</a> on <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span>.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> For instance, Tikhonov regularization corresponds to a normally distributed prior on <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> that is centered at 0. To see this, first note that the OLS objective is proportional to the <a href="/wiki/Log-likelihood" class="mw-redirect" title="Log-likelihood">log-likelihood</a> function when each sampled <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle y^{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle y^{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a5fa6105fe60e4517be44d6a20659e00faade091" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.96ex; height:3.009ex;" alt="{\displaystyle y^{i}}"></span> is normally distributed around <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w^{\mathsf {T}}\cdot x^{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mo>⋅<!-- ⋅ --></mo> <msup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w^{\mathsf {T}}\cdot x^{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b4fff1be812f9ef3ed3de1e2c3fa65e3e7eab842" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:6.824ex; height:2.676ex;" alt="{\displaystyle w^{\mathsf {T}}\cdot x^{i}}"></span>. Then observe that a normal prior on <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> centered at 0 has a log-probability of the form<span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \log P(w)=q-\alpha \sum _{j=1}^{d}w_{j}^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>log</mi> <mo>⁡<!-- --></mo> <mi>P</mi> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mi>q</mi> <mo>−<!-- − --></mo> <mi>α<!-- α --></mi> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </munderover> <msubsup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \log P(w)=q-\alpha \sum _{j=1}^{d}w_{j}^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e4c8416c34045f5b489eeb6d45de2e64b0b82a4c" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:23.921ex; height:7.676ex;" alt="{\displaystyle \log P(w)=q-\alpha \sum _{j=1}^{d}w_{j}^{2}}"></span>where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle q}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>q</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle q}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/06809d64fa7c817ffc7e323f85997f783dbdf71d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.07ex; height:2.009ex;" alt="{\displaystyle q}"></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \alpha }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>α<!-- α --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \alpha }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b79333175c8b3f0840bfb4ec41b8072c83ea88d3" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.488ex; height:1.676ex;" alt="{\displaystyle \alpha }"></span> are constants that depend on the variance of the prior and are independent of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span>. Thus, minimizing the logarithm of the likelihood times the prior is equivalent to minimizing the sum of the OLS loss function and the ridge regression regularization term. </p><p>This gives a more intuitive interpretation for why <a href="/wiki/Tikhonov_regularization" class="mw-redirect" title="Tikhonov regularization">Tikhonov regularization</a> leads to a unique solution to the least-squares problem: there are infinitely many vectors <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> satisfying the constraints obtained from the data, but since we come to the problem with a prior belief that <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> is normally distributed around the origin, we will end up choosing a solution with this constraint in mind. </p><p>Other regularization methods correspond to different priors. See the <a class="mw-selflink-fragment" href="#List_of_RLS_methods">list</a> below for more details. </p> <div class="mw-heading mw-heading2"><h2 id="Specific_examples">Specific examples</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=11" title="Edit section: Specific examples"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-heading mw-heading3"><h3 id="Ridge_regression_(or_Tikhonov_regularization)"><span id="Ridge_regression_.28or_Tikhonov_regularization.29"></span>Ridge regression (or Tikhonov regularization)<span class="anchor" id="Ridge_regression"></span><span class="anchor" id="Tikhonov_regularization"></span></h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=12" title="Edit section: Ridge regression (or Tikhonov regularization)"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951"><div role="note" class="hatnote navigation-not-searchable">Main article: <a href="/wiki/Ridge_regression" title="Ridge regression">Ridge regression (or Tikhonov regularization)</a></div> <p>One particularly common choice for the penalty function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle R}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>R</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle R}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4b0bfb3769bf24d80e15374dc37b0441e2616e33" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.764ex; height:2.176ex;" alt="{\displaystyle R}"></span> is the squared <a href="/wiki/L2_norm" class="mw-redirect" title="L2 norm"><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \ell _{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>ℓ<!-- ℓ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \ell _{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/85a4571ee9be10bd3c9df2480ab3d280f99e801a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.024ex; height:2.509ex;" alt="{\displaystyle \ell _{2}}"></span> norm</a>, i.e., <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle R(w)=\sum _{j=1}^{d}w_{j}^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>R</mi> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </munderover> <msubsup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle R(w)=\sum _{j=1}^{d}w_{j}^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/09dce37d2109830c569426a9ad5706d2943f3f49" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:14.796ex; height:7.676ex;" alt="{\displaystyle R(w)=\sum _{j=1}^{d}w_{j}^{2}}"></span> <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda \sum _{j=1}^{d}\left|w_{j}\right|^{2}\rightarrow \min _{w\in \mathbb {R} ^{d}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi>X</mi> <mi>w</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <mi>λ<!-- λ --></mi> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </munderover> <msup> <mrow> <mo>|</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>|</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">→<!-- → --></mo> <munder> <mo movablelimits="true" form="prefix">min</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>w</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mrow> </munder> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda \sum _{j=1}^{d}\left|w_{j}\right|^{2}\rightarrow \min _{w\in \mathbb {R} ^{d}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/25e8d65a69eec0c70002c930cdcfeefd3e17ed56" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:35.05ex; height:7.676ex;" alt="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda \sum _{j=1}^{d}\left|w_{j}\right|^{2}\rightarrow \min _{w\in \mathbb {R} ^{d}}}"></span> The most common names for this are called <a href="/wiki/Tikhonov_regularization" class="mw-redirect" title="Tikhonov regularization">Tikhonov regularization</a> and <a href="/wiki/Ridge_regression" title="Ridge regression">ridge regression</a>. It admits a closed-form solution for <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span>: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w=\left(X^{\mathsf {T}}X+\lambda I\right)^{-1}X^{\mathsf {T}}Y}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> <mo>=</mo> <msup> <mrow> <mo>(</mo> <mrow> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> <mo>+</mo> <mi>λ<!-- λ --></mi> <mi>I</mi> </mrow> <mo>)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </msup> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>Y</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w=\left(X^{\mathsf {T}}X+\lambda I\right)^{-1}X^{\mathsf {T}}Y}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8f0af0a01f17ca3df82881bec7f6fcd18248e22e" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:25.043ex; height:3.843ex;" alt="{\displaystyle w=\left(X^{\mathsf {T}}X+\lambda I\right)^{-1}X^{\mathsf {T}}Y}"></span> The name ridge regression alludes to the fact that the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda I}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> <mi>I</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda I}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/eb3c18741afe6ce4adee519b4204ba307b6d671b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.527ex; height:2.176ex;" alt="{\displaystyle \lambda I}"></span> term adds positive entries along the diagonal "ridge" of the sample <a href="/wiki/Covariance_matrix" title="Covariance matrix">covariance matrix</a> <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X^{\mathsf {T}}X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X^{\mathsf {T}}X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/75a4cb7b8bf2de8f4b677bee7ac95cd1fd3ef35f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.328ex; height:2.676ex;" alt="{\displaystyle X^{\mathsf {T}}X}"></span>. </p><p>When <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda =0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> <mo>=</mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda =0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/00c4bba30544017fe76932de5a4e25adb5512d95" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.616ex; height:2.176ex;" alt="{\displaystyle \lambda =0}"></span>, i.e., in the case of <a href="/wiki/Ordinary_least_squares" title="Ordinary least squares">ordinary least squares</a>, the condition that <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle d>n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>d</mi> <mo>></mo> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle d>n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f34c1bdc4a27a81d1877fb48f135d603e8ae6349" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.709ex; height:2.176ex;" alt="{\displaystyle d>n}"></span> causes the sample <a href="/wiki/Covariance_matrix" title="Covariance matrix">covariance matrix</a> <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle X^{\mathsf {T}}X}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mi>X</mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msup> <mi>X</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle X^{\mathsf {T}}X}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/75a4cb7b8bf2de8f4b677bee7ac95cd1fd3ef35f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.328ex; height:2.676ex;" alt="{\displaystyle X^{\mathsf {T}}X}"></span> to not have full rank and so it cannot be inverted to yield a unique solution. This is why there can be an infinitude of solutions to the <a href="/wiki/Ordinary_least_squares" title="Ordinary least squares">ordinary least squares</a> problem when <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle d>n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>d</mi> <mo>></mo> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle d>n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f34c1bdc4a27a81d1877fb48f135d603e8ae6349" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.709ex; height:2.176ex;" alt="{\displaystyle d>n}"></span>. However, when <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda >0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> <mo>></mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda >0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/eea25afc0351140f919cf791c49c1964b8b081de" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.616ex; height:2.176ex;" alt="{\displaystyle \lambda >0}"></span>, i.e., when ridge regression is used, the addition of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda I}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>λ<!-- λ --></mi> <mi>I</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda I}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/eb3c18741afe6ce4adee519b4204ba307b6d671b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.527ex; height:2.176ex;" alt="{\displaystyle \lambda I}"></span> to the sample covariance matrix ensures that all of its eigenvalues will be strictly greater than 0. In other words, it becomes invertible, and the solution becomes unique. </p><p>Compared to ordinary least squares, ridge regression is not unbiased. It accepts bias to reduce variance and the <a href="/wiki/Mean_square_error" class="mw-redirect" title="Mean square error">mean square error</a>. </p> <div class="mw-heading mw-heading3"><h3 id="Lasso_regression">Lasso regression</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=13" title="Edit section: Lasso regression"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951"><div role="note" class="hatnote navigation-not-searchable">Main article: <a href="/wiki/Lasso_(statistics)" title="Lasso (statistics)">Lasso (statistics)</a></div> <p>The least absolute selection and shrinkage (LASSO) method is another popular choice. In <a href="/wiki/Lasso_regression" class="mw-redirect" title="Lasso regression">lasso regression</a>, the lasso penalty function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle R}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>R</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle R}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4b0bfb3769bf24d80e15374dc37b0441e2616e33" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.764ex; height:2.176ex;" alt="{\displaystyle R}"></span> is the <a href="/wiki/L1_norm" class="mw-redirect" title="L1 norm"><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \ell _{1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>ℓ<!-- ℓ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \ell _{1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/361ddd720474aa41cb05453e03424fb7999d3b02" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.024ex; height:2.509ex;" alt="{\displaystyle \ell _{1}}"></span> norm</a>, i.e. <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle R(w)=\sum _{j=1}^{d}\left|w_{j}\right|}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>R</mi> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> <mo>=</mo> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </munderover> <mrow> <mo>|</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>|</mo> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle R(w)=\sum _{j=1}^{d}\left|w_{j}\right|}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c7a573a81a355e2fb71264db3e912e8b26b06235" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:15.946ex; height:7.676ex;" alt="{\displaystyle R(w)=\sum _{j=1}^{d}\left|w_{j}\right|}"></span> <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda \sum _{j=1}^{d}|w_{j}|\rightarrow \min _{w\in \mathbb {R} ^{d}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi>X</mi> <mi>w</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <mi>λ<!-- λ --></mi> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </munderover> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">|</mo> </mrow> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">|</mo> </mrow> <mo stretchy="false">→<!-- → --></mo> <munder> <mo movablelimits="true" form="prefix">min</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>w</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mrow> </munder> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda \sum _{j=1}^{d}|w_{j}|\rightarrow \min _{w\in \mathbb {R} ^{d}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/546ead527e52d236196e1bc2f77227140115075c" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:33.996ex; height:7.676ex;" alt="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda \sum _{j=1}^{d}|w_{j}|\rightarrow \min _{w\in \mathbb {R} ^{d}}}"></span> </p><p>Note that the lasso penalty function is convex but not strictly convex. Unlike <a href="/wiki/Tikhonov_regularization" class="mw-redirect" title="Tikhonov regularization">Tikhonov regularization</a>, this scheme does not have a convenient closed-form solution: instead, the solution is typically found using <a href="/wiki/Quadratic_programming" title="Quadratic programming">quadratic programming</a> or more general <a href="/wiki/Convex_optimization" title="Convex optimization">convex optimization</a> methods, as well as by specific algorithms such as the <a href="/wiki/Least-angle_regression" title="Least-angle regression">least-angle regression</a> algorithm. </p><p>An important difference between lasso regression and Tikhonov regularization is that lasso regression forces more entries of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> to actually equal 0 than would otherwise. In contrast, while Tikhonov regularization forces entries of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> to be small, it does not force more of them to be 0 than would be otherwise. Thus, LASSO regularization is more appropriate than Tikhonov regularization in cases in which we expect the number of non-zero entries of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> to be small, and Tikhonov regularization is more appropriate when we expect that entries of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> will generally be small but not necessarily zero. Which of these regimes is more relevant depends on the specific data set at hand. </p><p>Besides feature selection described above, LASSO has some limitations. Ridge regression provides better accuracy in the case <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n>d}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> <mo>></mo> <mi>d</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n>d}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/dfe1896db4e754264895da715791d9bd9387b3ce" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.709ex; height:2.176ex;" alt="{\displaystyle n>d}"></span> for highly correlated variables.<sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup> In another case, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n<d}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> <mo><</mo> <mi>d</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n<d}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/fb24d50a959320f2b673f848b6195d1a6ddf0dba" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.709ex; height:2.176ex;" alt="{\displaystyle n<d}"></span>, LASSO selects at most <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a601995d55609f2d9f5e233e36fbe9ea26011b3b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.395ex; height:1.676ex;" alt="{\displaystyle n}"></span> variables. Moreover, LASSO tends to select some arbitrary variables from group of highly correlated samples, so there is no grouping effect. </p> <div class="mw-heading mw-heading3"><h3 id="ℓ0_Penalization"><span id=".E2.84.930_Penalization"></span><i>ℓ</i><sub>0</sub> Penalization</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=14" title="Edit section: ℓ0 Penalization"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda \left\|w_{j}\right\|_{0}\rightarrow \min _{w\in \mathbb {R} ^{d}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi>X</mi> <mi>w</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <mi>λ<!-- λ --></mi> <msub> <mrow> <mo symmetric="true">‖</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> <mo stretchy="false">→<!-- → --></mo> <munder> <mo movablelimits="true" form="prefix">min</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>w</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mrow> </munder> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda \left\|w_{j}\right\|_{0}\rightarrow \min _{w\in \mathbb {R} ^{d}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/03cd42569672050801f96905cef145d24e6619ee" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.505ex; width:31.952ex; height:5.843ex;" alt="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda \left\|w_{j}\right\|_{0}\rightarrow \min _{w\in \mathbb {R} ^{d}}}"></span> The most extreme way to enforce sparsity is to say that the actual magnitude of the coefficients of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> does not matter; rather, the only thing that determines the complexity of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span> is the number of non-zero entries. This corresponds to setting <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle R(w)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>R</mi> <mo stretchy="false">(</mo> <mi>w</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle R(w)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e45c97c0143ecc12a2c8b8e87b5c6ffe1870b065" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.237ex; height:2.843ex;" alt="{\displaystyle R(w)}"></span> to be the <a href="/wiki/L0_norm" class="mw-redirect" title="L0 norm"><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \ell _{0}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>ℓ<!-- ℓ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \ell _{0}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d18f7cb79dd41b63d6aca9ec6b957c225a0aea81" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.024ex; height:2.509ex;" alt="{\displaystyle \ell _{0}}"></span> norm</a> of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle w}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>w</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle w}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/88b1e0c8e1be5ebe69d18a8010676fa42d7961e6" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.664ex; height:1.676ex;" alt="{\displaystyle w}"></span>. This regularization function, while attractive for the sparsity that it guarantees, is very difficult to solve because doing so requires optimization of a function that is not even weakly <a href="/wiki/Convex_optimization" title="Convex optimization">convex</a>. Lasso regression is the minimal possible relaxation of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \ell _{0}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>ℓ<!-- ℓ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \ell _{0}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d18f7cb79dd41b63d6aca9ec6b957c225a0aea81" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.024ex; height:2.509ex;" alt="{\displaystyle \ell _{0}}"></span> penalization that yields a weakly convex optimization problem. </p> <div class="mw-heading mw-heading3"><h3 id="Elastic_net">Elastic net</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=15" title="Edit section: Elastic net"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951"><div role="note" class="hatnote navigation-not-searchable">Main article: <a href="/wiki/Elastic_net_regularization" title="Elastic net regularization">Elastic net regularization</a></div> <p>For any non-negative <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda _{1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda _{1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/571a423bece8f29bcd1b48572f18dd4f6213dce2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.409ex; height:2.509ex;" alt="{\displaystyle \lambda _{1}}"></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lambda _{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lambda _{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6b668a1bd1e8ab9452ca975b7497546e7c1ba187" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.409ex; height:2.509ex;" alt="{\displaystyle \lambda _{2}}"></span> the objective has the following form: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda _{1}\sum _{j=1}^{d}\left|w_{j}\right|+\lambda _{2}\sum _{j=1}^{d}\left|w_{j}\right|^{2}\rightarrow \min _{w\in \mathbb {R} ^{d}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi>X</mi> <mi>w</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo>+</mo> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </munderover> <mrow> <mo>|</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>|</mo> </mrow> <mo>+</mo> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </munderover> <msup> <mrow> <mo>|</mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> <mo>|</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msup> <mo stretchy="false">→<!-- → --></mo> <munder> <mo movablelimits="true" form="prefix">min</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>w</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mrow> </munder> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda _{1}\sum _{j=1}^{d}\left|w_{j}\right|+\lambda _{2}\sum _{j=1}^{d}\left|w_{j}\right|^{2}\rightarrow \min _{w\in \mathbb {R} ^{d}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9329e805ea3609e70cf97e0b616c6c68e36a2bd8" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:49.351ex; height:7.676ex;" alt="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}+\lambda _{1}\sum _{j=1}^{d}\left|w_{j}\right|+\lambda _{2}\sum _{j=1}^{d}\left|w_{j}\right|^{2}\rightarrow \min _{w\in \mathbb {R} ^{d}}}"></span> </p><p>Let <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \alpha ={\frac {\lambda _{1}}{\lambda _{1}+\lambda _{2}}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>α<!-- α --></mi> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mrow> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> </mrow> </mfrac> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \alpha ={\frac {\lambda _{1}}{\lambda _{1}+\lambda _{2}}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f8332fbd7aa49650072a743c40141669756de4b0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.338ex; width:13.082ex; height:5.843ex;" alt="{\displaystyle \alpha ={\frac {\lambda _{1}}{\lambda _{1}+\lambda _{2}}}}"></span>, then the solution of the minimization problem is described as: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}\rightarrow \min _{w\in \mathbb {R} ^{d}}{\text{ s.t. }}(1-\alpha )\left\|w\right\|_{1}+\alpha \left\|w\right\|_{2}\leq t}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mn>1</mn> <mi>n</mi> </mfrac> </mrow> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mrow> <mi>Y</mi> <mo>−<!-- − --></mo> <mi>X</mi> <mi>w</mi> </mrow> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> <mo stretchy="false">→<!-- → --></mo> <munder> <mo movablelimits="true" form="prefix">min</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>w</mi> <mo>∈<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mrow> </munder> <mrow class="MJX-TeXAtom-ORD"> <mtext> s.t. </mtext> </mrow> <mo stretchy="false">(</mo> <mn>1</mn> <mo>−<!-- − --></mo> <mi>α<!-- α --></mi> <mo stretchy="false">)</mo> <msub> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>+</mo> <mi>α<!-- α --></mi> <msub> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>≤<!-- ≤ --></mo> <mi>t</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}\rightarrow \min _{w\in \mathbb {R} ^{d}}{\text{ s.t. }}(1-\alpha )\left\|w\right\|_{1}+\alpha \left\|w\right\|_{2}\leq t}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c5b9844be1958f2a8424185f6dce589e62124d6c" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.505ex; width:52.119ex; height:5.843ex;" alt="{\displaystyle {\frac {1}{n}}\left\|Y-Xw\right\|_{2}^{2}\rightarrow \min _{w\in \mathbb {R} ^{d}}{\text{ s.t. }}(1-\alpha )\left\|w\right\|_{1}+\alpha \left\|w\right\|_{2}\leq t}"></span> for some <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/65658b7b223af9e1acc877d848888ecdb4466560" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.84ex; height:2.009ex;" alt="{\displaystyle t}"></span>. </p><p>Consider <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (1-\alpha )\left\|w\right\|_{1}+\alpha \left\|w\right\|_{2}\leq t}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <mn>1</mn> <mo>−<!-- − --></mo> <mi>α<!-- α --></mi> <mo stretchy="false">)</mo> <msub> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>+</mo> <mi>α<!-- α --></mi> <msub> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>≤<!-- ≤ --></mo> <mi>t</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (1-\alpha )\left\|w\right\|_{1}+\alpha \left\|w\right\|_{2}\leq t}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/15cbf308b7b0fe3e32dc978ac8f2d68c1db61e80" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:25.652ex; height:3.009ex;" alt="{\displaystyle (1-\alpha )\left\|w\right\|_{1}+\alpha \left\|w\right\|_{2}\leq t}"></span> as an Elastic Net penalty function. </p><p>When <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \alpha =1}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>α<!-- α --></mi> <mo>=</mo> <mn>1</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \alpha =1}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/03d67a45a44be8b8f15e99b7def2b0cf0aba1717" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.749ex; height:2.176ex;" alt="{\displaystyle \alpha =1}"></span>, elastic net becomes ridge regression, whereas <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \alpha =0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>α<!-- α --></mi> <mo>=</mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \alpha =0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/30cc00f65bbc630448311dd2dc82e7ce5e90985a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.749ex; height:2.176ex;" alt="{\displaystyle \alpha =0}"></span> it becomes Lasso. <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \forall \alpha \in (0,1]}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi mathvariant="normal">∀<!-- ∀ --></mi> <mi>α<!-- α --></mi> <mo>∈<!-- ∈ --></mo> <mo stretchy="false">(</mo> <mn>0</mn> <mo>,</mo> <mn>1</mn> <mo stretchy="false">]</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \forall \alpha \in (0,1]}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a30bd3118211ecc4c1e69bf3317464749f4b2211" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:10.531ex; height:2.843ex;" alt="{\displaystyle \forall \alpha \in (0,1]}"></span> Elastic Net penalty function doesn't have the first derivative at 0 and it is strictly convex <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \forall \alpha >0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi mathvariant="normal">∀<!-- ∀ --></mi> <mi>α<!-- α --></mi> <mo>></mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \forall \alpha >0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/85306013a0d13df5769b2d6c208df42c78869689" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:7.041ex; height:2.176ex;" alt="{\displaystyle \forall \alpha >0}"></span> taking the properties both <a href="/wiki/Lasso_regression" class="mw-redirect" title="Lasso regression">lasso regression</a> and <a href="/wiki/Ridge_regression" title="Ridge regression">ridge regression</a>. </p><p>One of the main properties of the Elastic Net is that it can select groups of correlated variables. The difference between weight vectors of samples <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e87000dd6142b81d041896a30fe58f0c3acb2158" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.129ex; height:2.009ex;" alt="{\displaystyle x_{i}}"></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{j}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{j}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5db47cb3d2f9496205a17a6856c91c1d3d363ccd" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:2.239ex; height:2.343ex;" alt="{\displaystyle x_{j}}"></span> is given by: <span class="mwe-math-element"><span class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \left|w_{i}^{*}(\lambda _{1},\lambda _{2})-w_{j}^{*}(\lambda _{1},\lambda _{2})\right|\leq {\frac {\sum _{i=1}^{n}|y_{i}|}{\lambda _{2}}}{\sqrt {2(1-\rho _{ij})}},}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow> <mo>|</mo> <mrow> <msubsup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> <mo stretchy="false">(</mo> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>−<!-- − --></mo> <msubsup> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mo>∗<!-- ∗ --></mo> </mrow> </msubsup> <mo stretchy="false">(</mo> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo stretchy="false">)</mo> </mrow> <mo>|</mo> </mrow> <mo>≤<!-- ≤ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </munderover> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">|</mo> </mrow> <msub> <mi>y</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">|</mo> </mrow> </mrow> <msub> <mi>λ<!-- λ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> </mfrac> </mrow> <mrow class="MJX-TeXAtom-ORD"> <msqrt> <mn>2</mn> <mo stretchy="false">(</mo> <mn>1</mn> <mo>−<!-- − --></mo> <msub> <mi>ρ<!-- ρ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mi>j</mi> </mrow> </msub> <mo stretchy="false">)</mo> </msqrt> </mrow> <mo>,</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \left|w_{i}^{*}(\lambda _{1},\lambda _{2})-w_{j}^{*}(\lambda _{1},\lambda _{2})\right|\leq {\frac {\sum _{i=1}^{n}|y_{i}|}{\lambda _{2}}}{\sqrt {2(1-\rho _{ij})}},}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/98b13aa52686eb269a130ebf3ebdec0acb78fdc1" class="mwe-math-fallback-image-display mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.338ex; width:50.428ex; height:6.343ex;" alt="{\displaystyle \left|w_{i}^{*}(\lambda _{1},\lambda _{2})-w_{j}^{*}(\lambda _{1},\lambda _{2})\right|\leq {\frac {\sum _{i=1}^{n}|y_{i}|}{\lambda _{2}}}{\sqrt {2(1-\rho _{ij})}},}"></span> where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \rho _{ij}=x_{i}^{\mathsf {T}}x_{j}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>ρ<!-- ρ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mi>j</mi> </mrow> </msub> <mo>=</mo> <msubsup> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="sans-serif">T</mi> </mrow> </mrow> </msubsup> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \rho _{ij}=x_{i}^{\mathsf {T}}x_{j}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7547df5a706dd1aa65907967f5268318ae0c6865" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:10.698ex; height:3.176ex;" alt="{\displaystyle \rho _{ij}=x_{i}^{\mathsf {T}}x_{j}}"></span>.<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup> </p><p>If <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{i}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{i}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e87000dd6142b81d041896a30fe58f0c3acb2158" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.129ex; height:2.009ex;" alt="{\displaystyle x_{i}}"></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{j}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{j}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5db47cb3d2f9496205a17a6856c91c1d3d363ccd" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:2.239ex; height:2.343ex;" alt="{\displaystyle x_{j}}"></span> are highly correlated (<span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \rho _{ij}\to 1}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>ρ<!-- ρ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mi>j</mi> </mrow> </msub> <mo stretchy="false">→<!-- → --></mo> <mn>1</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \rho _{ij}\to 1}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a7c096c1bc373802c021f85ccde5ff759966c28e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:7.456ex; height:2.843ex;" alt="{\displaystyle \rho _{ij}\to 1}"></span>), the weight vectors are very close. In the case of negatively correlated samples (<span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \rho _{ij}\to -1}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>ρ<!-- ρ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mi>j</mi> </mrow> </msub> <mo stretchy="false">→<!-- → --></mo> <mo>−<!-- − --></mo> <mn>1</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \rho _{ij}\to -1}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a6157dbf5aef2fc296a8891bcaa9008780377a34" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:9.264ex; height:2.843ex;" alt="{\displaystyle \rho _{ij}\to -1}"></span>) the samples <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle -x_{j}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo>−<!-- − --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle -x_{j}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/20aa1892160b7294a64ee96d0ebbac3e2db3e75b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:4.048ex; height:2.676ex;" alt="{\displaystyle -x_{j}}"></span> can be taken. To summarize, for highly correlated variables the weight vectors tend to be equal up to a sign in the case of negative correlated variables. </p> <div class="mw-heading mw-heading2"><h2 id="Partial_list_of_RLS_methods">Partial list of RLS methods</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=16" title="Edit section: Partial list of RLS methods"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The following is a list of possible choices of the regularization function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle R(\cdot )}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>R</mi> <mo stretchy="false">(</mo> <mo>⋅<!-- ⋅ --></mo> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle R(\cdot )}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/da034a0978dea21bfca6d4e994482e35f560dfbf" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:4.22ex; height:2.843ex;" alt="{\displaystyle R(\cdot )}"></span>, along with the name for each one, the corresponding prior if there is a simple one, and ways for computing the solution to the resulting optimization problem. </p> <table class="wikitable sortable"> <tbody><tr> <th>Name</th> <th>Regularization function</th> <th>Corresponding prior</th> <th>Methods for solving </th></tr> <tr> <td><a href="/wiki/Tikhonov_regularization" class="mw-redirect" title="Tikhonov regularization">Tikhonov regularization</a></td> <td><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \left\|w\right\|_{2}^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \left\|w\right\|_{2}^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/945c3302192dcf6de09fe72dc0a85ddebbfa9d28" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:5.043ex; height:3.509ex;" alt="{\displaystyle \left\|w\right\|_{2}^{2}}"></span></td> <td><a href="/wiki/Normal_distribution" title="Normal distribution">Normal</a></td> <td>Closed form </td></tr> <tr> <td><a href="/wiki/Lasso_(statistics)" title="Lasso (statistics)">Lasso regression</a></td> <td><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \left\|w\right\|_{1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \left\|w\right\|_{1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5fe56b8011f675610c77a2cd27f827a7b59dc08f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:5.043ex; height:3.009ex;" alt="{\displaystyle \left\|w\right\|_{1}}"></span></td> <td><a href="/wiki/Laplace_distribution" title="Laplace distribution">Laplace</a></td> <td><a href="/wiki/Proximal_gradient_method" title="Proximal gradient method">Proximal gradient descent</a>, <a href="/wiki/Least_angle_regression" class="mw-redirect" title="Least angle regression">least angle regression</a> </td></tr> <tr> <td><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \ell _{0}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>ℓ<!-- ℓ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \ell _{0}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d18f7cb79dd41b63d6aca9ec6b957c225a0aea81" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.024ex; height:2.509ex;" alt="{\displaystyle \ell _{0}}"></span> penalization</td> <td><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \left\|w\right\|_{0}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \left\|w\right\|_{0}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/53b95ee090171fc49f6f26e7c97358da42017730" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:5.043ex; height:3.009ex;" alt="{\displaystyle \left\|w\right\|_{0}}"></span></td> <td>–</td> <td><a href="/wiki/Forward_selection" class="mw-redirect" title="Forward selection">Forward selection</a>, <a href="/wiki/Backward_elimination" class="mw-redirect" title="Backward elimination">Backward elimination</a>, use of priors such as <a href="/w/index.php?title=Spike_and_slab&action=edit&redlink=1" class="new" title="Spike and slab (page does not exist)">spike and slab</a> </td></tr> <tr> <td><a href="/wiki/Elastic_net_regularization" title="Elastic net regularization">Elastic nets</a></td> <td><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \beta \left\|w\right\|_{1}+(1-\beta )\left\|w\right\|_{2}^{2}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>β<!-- β --></mi> <msub> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>+</mo> <mo stretchy="false">(</mo> <mn>1</mn> <mo>−<!-- − --></mo> <mi>β<!-- β --></mi> <mo stretchy="false">)</mo> <msubsup> <mrow> <mo symmetric="true">‖</mo> <mi>w</mi> <mo symmetric="true">‖</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msubsup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \beta \left\|w\right\|_{1}+(1-\beta )\left\|w\right\|_{2}^{2}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9346158535e29587cd8723884c694fccea4de330" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:21.403ex; height:3.509ex;" alt="{\displaystyle \beta \left\|w\right\|_{1}+(1-\beta )\left\|w\right\|_{2}^{2}}"></span></td> <td>Normal and Laplace <a href="/wiki/Mixture_(probability)" title="Mixture (probability)">mixture</a></td> <td><a href="/wiki/Proximal_gradient_method" title="Proximal gradient method">Proximal gradient descent</a> </td></tr> <tr> <td><a href="/wiki/Total_variation_regularization" class="mw-redirect" title="Total variation regularization">Total variation regularization</a></td> <td><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \sum _{j=1}^{d-1}\left|w_{j+1}-w_{j}\right|}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <munderover> <mo>∑<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>=</mo> <mn>1</mn> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> <mo>−<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <mrow> <mo>|</mo> <mrow> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>−<!-- − --></mo> <msub> <mi>w</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>j</mi> </mrow> </msub> </mrow> <mo>|</mo> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \sum _{j=1}^{d-1}\left|w_{j+1}-w_{j}\right|}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b52b8e15cceac56977e3f51b4d7f2720d0f97589" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.338ex; width:15.124ex; height:7.676ex;" alt="{\displaystyle \sum _{j=1}^{d-1}\left|w_{j+1}-w_{j}\right|}"></span></td> <td>–</td> <td><a href="/w/index.php?title=Split%E2%80%93Bregman_method&action=edit&redlink=1" class="new" title="Split–Bregman method (page does not exist)">Split–Bregman method</a>, among others </td></tr></tbody></table> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=17" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a href="/wiki/Least_squares" title="Least squares">Least squares</a></li> <li><a href="/wiki/Regularization_(mathematics)" title="Regularization (mathematics)">Regularization</a> in mathematics.</li> <li><a href="/wiki/Generalization_error" title="Generalization error">Generalization error</a>, one of the reasons regularization is used.</li> <li><a href="/wiki/Tikhonov_regularization" class="mw-redirect" title="Tikhonov regularization">Tikhonov regularization</a></li> <li><a href="/wiki/Lasso_regression" class="mw-redirect" title="Lasso regression">Lasso regression</a></li> <li><a href="/wiki/Elastic_net_regularization" title="Elastic net regularization">Elastic net regularization</a></li> <li><a href="/wiki/Least-angle_regression" title="Least-angle regression">Least-angle regression</a></li></ul> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=18" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist"> <div class="mw-references-wrap"><ol class="references"> <li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite id="CITEREFHuang2022" class="citation journal cs1">Huang, Yunfei.; et al. (2022). <a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9755218">"Sparse inference and active learning of stochastic differential equations from data"</a>. <i>Scientific Reports</i>. <b>12</b> (1): 21691. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.1038%2Fs41598-022-25638-9">10.1038/s41598-022-25638-9</a></span>. <a href="/wiki/PMC_(identifier)" class="mw-redirect" title="PMC (identifier)">PMC</a> <span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9755218">9755218</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a> <a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/36522347">36522347</a>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Scientific+Reports&rft.atitle=Sparse+inference+and+active+learning+of+stochastic+differential+equations+from+data&rft.volume=12&rft.issue=1&rft.pages=21691&rft.date=2022&rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC9755218%23id-name%3DPMC&rft_id=info%3Apmid%2F36522347&rft_id=info%3Adoi%2F10.1038%2Fs41598-022-25638-9&rft.aulast=Huang&rft.aufirst=Yunfei.&rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC9755218&rfr_id=info%3Asid%2Fen.wikipedia.org%3ARegularized+least+squares" class="Z3988"></span></span> </li> <li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFTibshirani_Robert1996" class="citation journal cs1">Tibshirani Robert (1996). <a rel="nofollow" class="external text" href="https://web.stanford.edu/~hastie/Papers/elasticnet.pdf">"Regression shrinkage and selection via the lasso"</a> <span class="cs1-format">(PDF)</span>. <i>Journal of the Royal Statistical Society, Series B</i>. <b>58</b>: <i>pp.</i> 266–288.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Journal+of+the+Royal+Statistical+Society%2C+Series+B&rft.atitle=Regression+shrinkage+and+selection+via+the+lasso&rft.volume=58&rft.pages=%27%27pp.%27%27+266-288&rft.date=1996&rft.au=Tibshirani+Robert&rft_id=https%3A%2F%2Fweb.stanford.edu%2F~hastie%2FPapers%2Felasticnet.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3ARegularized+least+squares" class="Z3988"></span></span> </li> <li id="cite_note-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-3">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHui,_ZouHastie,_Trevor2003" class="citation journal cs1"><a href="/wiki/Zou_Hui" class="mw-redirect" title="Zou Hui">Hui, Zou</a>; Hastie, Trevor (2003). <a rel="nofollow" class="external text" href="https://web.stanford.edu/~hastie/Papers/elasticnet.pdf">"Regularization and Variable Selection via the Elastic Net"</a> <span class="cs1-format">(PDF)</span>. <i>Journal of the Royal Statistical Society, Series B</i>. <b>67</b> (2): <i>pp.</i> 301–320.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Journal+of+the+Royal+Statistical+Society%2C+Series+B&rft.atitle=Regularization+and+Variable+Selection+via+the+Elastic+Net&rft.volume=67&rft.issue=2&rft.pages=%27%27pp.%27%27+301-320&rft.date=2003&rft.au=Hui%2C+Zou&rft.au=Hastie%2C+Trevor&rft_id=https%3A%2F%2Fweb.stanford.edu%2F~hastie%2FPapers%2Felasticnet.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3ARegularized+least+squares" class="Z3988"></span></span> </li> </ol></div></div> <div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Regularized_least_squares&action=edit&section=19" title="Edit section: External links"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a rel="nofollow" class="external text" href="http://www.stanford.edu/~hastie/TALKS/enet_talk.pdf">http://www.stanford.edu/~hastie/TALKS/enet_talk.pdf Regularization and Variable Selection via the Elastic Net</a> (presentation)</li> <li><a rel="nofollow" class="external text" href="https://www.mit.edu/~9.520/fall15/slides/class06/class06_RLSSVM.pdf">Regularized Least Squares and Support Vector Machines</a> (presentation)</li> <li><a rel="nofollow" class="external text" href="https://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf">Regularized Least Squares</a>(presentation)</li></ul> <!-- NewPP limit report Parsed by mw‐web.codfw.main‐6df7948d6c‐2vfms Cached time: 20241127203021 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.430 seconds Real time usage: 0.654 seconds Preprocessor visited node count: 1448/1000000 Post‐expand include size: 22314/2097152 bytes Template argument size: 190/2097152 bytes Highest expansion depth: 9/100 Expensive parser function count: 8/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 31031/5000000 bytes Lua time usage: 0.196/10.000 seconds Lua memory usage: 3932745/52428800 bytes Number of Wikibase entities loaded: 0/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 310.990 1 -total 35.39% 110.054 1 Template:Reflist 32.96% 102.492 1 Template:Regression_bar 31.60% 98.274 1 Template:Sidebar 29.50% 91.738 3 Template:Cite_journal 17.68% 54.995 1 Template:Summarize 12.88% 40.064 1 Template:Ambox 6.13% 19.069 1 Template:Further 3.54% 11.006 1 Template:Portal-inline 2.65% 8.236 3 Template:Main --> <!-- Saved in parser cache with key enwiki:pcache:idhash:48803892-0!canonical and timestamp 20241127203021 and revision id 1259548425. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Regularized_least_squares&oldid=1259548425">https://en.wikipedia.org/w/index.php?title=Regularized_least_squares&oldid=1259548425</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Least_squares" title="Category:Least squares">Least squares</a></li><li><a href="/wiki/Category:Linear_algebra" title="Category:Linear algebra">Linear algebra</a></li><li><a href="/wiki/Category:Inverse_problems" title="Category:Inverse problems">Inverse problems</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 25 November 2024, at 19:39<span class="anonymous-show"> (UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Regularized_least_squares&mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-6b8d669998-qm7bt","wgBackendResponseTime":153,"wgPageParseReport":{"limitreport":{"cputime":"0.430","walltime":"0.654","ppvisitednodes":{"value":1448,"limit":1000000},"postexpandincludesize":{"value":22314,"limit":2097152},"templateargumentsize":{"value":190,"limit":2097152},"expansiondepth":{"value":9,"limit":100},"expensivefunctioncount":{"value":8,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":31031,"limit":5000000},"entityaccesscount":{"value":0,"limit":400},"timingprofile":["100.00% 310.990 1 -total"," 35.39% 110.054 1 Template:Reflist"," 32.96% 102.492 1 Template:Regression_bar"," 31.60% 98.274 1 Template:Sidebar"," 29.50% 91.738 3 Template:Cite_journal"," 17.68% 54.995 1 Template:Summarize"," 12.88% 40.064 1 Template:Ambox"," 6.13% 19.069 1 Template:Further"," 3.54% 11.006 1 Template:Portal-inline"," 2.65% 8.236 3 Template:Main"]},"scribunto":{"limitreport-timeusage":{"value":"0.196","limit":"10.000"},"limitreport-memusage":{"value":3932745,"limit":52428800},"limitreport-logs":"table#1 {\n}\n"},"cachereport":{"origin":"mw-web.codfw.main-6df7948d6c-2vfms","timestamp":"20241127203021","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Regularized least squares","url":"https:\/\/en.wikipedia.org\/wiki\/Regularized_least_squares","sameAs":"http:\/\/www.wikidata.org\/entity\/Q25304486","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q25304486","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2015-12-13T00:38:54Z","dateModified":"2024-11-25T19:39:30Z"}</script> </body> </html>