CINXE.COM

Residual neural network - Wikipedia

<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Residual neural network - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"de64cfb3-ad50-486e-ae97-1e2840ffd3fe","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Residual_neural_network","wgTitle":"Residual neural network","wgCurRevisionId":1268423920,"wgRevisionId":1268423920,"wgArticleId":55867424,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description matches Wikidata","Neural network architectures","Deep learning"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Residual_neural_network","wgRelevantArticleId":55867424,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false, "wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":30000,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q43744058","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready", "user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","ext.math.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","mediawiki.page.media","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming", "ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=ext.cite.styles%7Cext.math.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&amp;only=styles&amp;skin=vector-2022"> <script async="" src="/w/load.php?lang=en&amp;modules=startup&amp;only=scripts&amp;raw=1&amp;skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=site.styles&amp;only=styles&amp;skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.14"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/b/ba/ResBlock.png/1200px-ResBlock.png"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="650"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/b/ba/ResBlock.png/800px-ResBlock.png"> <meta property="og:image:width" content="800"> <meta property="og:image:height" content="433"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/b/ba/ResBlock.png/640px-ResBlock.png"> <meta property="og:image:width" content="640"> <meta property="og:image:height" content="347"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Residual neural network - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Residual_neural_network"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Residual_neural_network&amp;action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Residual_neural_network"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Residual_neural_network rootpage-Residual_neural_network skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" title="Main menu" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page&#039;s font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/?wmf_source=donate&amp;wmf_medium=sidebar&amp;wmf_campaign=en.wikipedia.org&amp;uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&amp;returnto=Residual+neural+network" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&amp;returnto=Residual+neural+network" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/?wmf_source=donate&amp;wmf_medium=sidebar&amp;wmf_campaign=en.wikipedia.org&amp;uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&amp;returnto=Residual+neural+network" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Residual+neural+network" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-Mathematics" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Mathematics"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Mathematics</span> </div> </a> <button aria-controls="toc-Mathematics-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Mathematics subsection</span> </button> <ul id="toc-Mathematics-sublist" class="vector-toc-list"> <li id="toc-Residual_connection" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Residual_connection"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.1</span> <span>Residual connection</span> </div> </a> <ul id="toc-Residual_connection-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Projection_connection" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Projection_connection"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.2</span> <span>Projection connection</span> </div> </a> <ul id="toc-Projection_connection-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Signal_propagation" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Signal_propagation"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.3</span> <span>Signal propagation</span> </div> </a> <ul id="toc-Signal_propagation-sublist" class="vector-toc-list"> <li id="toc-Forward_propagation" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Forward_propagation"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.3.1</span> <span>Forward propagation</span> </div> </a> <ul id="toc-Forward_propagation-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Backward_propagation" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Backward_propagation"> <div class="vector-toc-text"> <span class="vector-toc-numb">1.3.2</span> <span>Backward propagation</span> </div> </a> <ul id="toc-Backward_propagation-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> </ul> </li> <li id="toc-Variants_of_residual_blocks" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Variants_of_residual_blocks"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Variants of residual blocks</span> </div> </a> <button aria-controls="toc-Variants_of_residual_blocks-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Variants of residual blocks subsection</span> </button> <ul id="toc-Variants_of_residual_blocks-sublist" class="vector-toc-list"> <li id="toc-Basic_block" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Basic_block"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1</span> <span>Basic block</span> </div> </a> <ul id="toc-Basic_block-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Bottleneck_block" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Bottleneck_block"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.2</span> <span>Bottleneck block</span> </div> </a> <ul id="toc-Bottleneck_block-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Pre-activation_block" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Pre-activation_block"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.3</span> <span>Pre-activation block</span> </div> </a> <ul id="toc-Pre-activation_block-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Applications" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Applications"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Applications</span> </div> </a> <ul id="toc-Applications-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-History" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#History"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>History</span> </div> </a> <button aria-controls="toc-History-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle History subsection</span> </button> <ul id="toc-History-sublist" class="vector-toc-list"> <li id="toc-Previous_work" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Previous_work"> <div class="vector-toc-text"> <span class="vector-toc-numb">4.1</span> <span>Previous work</span> </div> </a> <ul id="toc-Previous_work-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Degradation_problem" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Degradation_problem"> <div class="vector-toc-text"> <span class="vector-toc-numb">4.2</span> <span>Degradation problem</span> </div> </a> <ul id="toc-Degradation_problem-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Subsequent_work" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Subsequent_work"> <div class="vector-toc-text"> <span class="vector-toc-numb">4.3</span> <span>Subsequent work</span> </div> </a> <ul id="toc-Subsequent_work-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" title="Table of Contents" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Residual neural network</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 9 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-9" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">9 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/Xarxa_neuronal_residual" title="Xarxa neuronal residual – Catalan" lang="ca" hreflang="ca" data-title="Xarxa neuronal residual" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/Red_neuronal_residual" title="Red neuronal residual – Spanish" lang="es" hreflang="es" data-title="Red neuronal residual" data-language-autonym="Español" data-language-local-name="Spanish" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%D8%B4%D8%A8%DA%A9%D9%87_%D8%B9%D8%B5%D8%A8%DB%8C_%D8%A8%D8%A7%D9%82%DB%8C%E2%80%8C%D9%85%D8%A7%D9%86%D8%AF%D9%87" title="شبکه عصبی باقی‌مانده – Persian" lang="fa" hreflang="fa" data-title="شبکه عصبی باقی‌مانده" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-fr mw-list-item"><a href="https://fr.wikipedia.org/wiki/R%C3%A9seau_neuronal_r%C3%A9siduel" title="Réseau neuronal résiduel – French" lang="fr" hreflang="fr" data-title="Réseau neuronal résiduel" data-language-autonym="Français" data-language-local-name="French" class="interlanguage-link-target"><span>Français</span></a></li><li class="interlanguage-link interwiki-ko mw-list-item"><a href="https://ko.wikipedia.org/wiki/%EC%9E%94%EC%B0%A8_%EC%8B%A0%EA%B2%BD%EB%A7%9D" title="잔차 신경망 – Korean" lang="ko" hreflang="ko" data-title="잔차 신경망" data-language-autonym="한국어" data-language-local-name="Korean" class="interlanguage-link-target"><span>한국어</span></a></li><li class="interlanguage-link interwiki-ja mw-list-item"><a href="https://ja.wikipedia.org/wiki/%E6%AE%8B%E5%B7%AE%E3%83%8D%E3%83%83%E3%83%88%E3%83%AF%E3%83%BC%E3%82%AF" title="残差ネットワーク – Japanese" lang="ja" hreflang="ja" data-title="残差ネットワーク" data-language-autonym="日本語" data-language-local-name="Japanese" class="interlanguage-link-target"><span>日本語</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/%D0%97%D0%B0%D0%BB%D0%B8%D1%88%D0%BA%D0%BE%D0%B2%D0%B0_%D0%BD%D0%B5%D0%B9%D1%80%D0%BE%D0%BD%D0%BD%D0%B0_%D0%BC%D0%B5%D1%80%D0%B5%D0%B6%D0%B0" title="Залишкова нейронна мережа – Ukrainian" lang="uk" hreflang="uk" data-title="Залишкова нейронна мережа" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/%E6%AE%98%E5%B7%AE%E7%A5%9E%E7%B6%93%E7%B6%B2%E7%B5%A1" title="殘差神經網絡 – Cantonese" lang="yue" hreflang="yue" data-title="殘差神經網絡" data-language-autonym="粵語" data-language-local-name="Cantonese" class="interlanguage-link-target"><span>粵語</span></a></li><li class="interlanguage-link interwiki-zh mw-list-item"><a href="https://zh.wikipedia.org/wiki/%E6%AE%8B%E5%B7%AE%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C" title="残差神经网络 – Chinese" lang="zh" hreflang="zh" data-title="残差神经网络" data-language-autonym="中文" data-language-local-name="Chinese" class="interlanguage-link-target"><span>中文</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q43744058#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Residual_neural_network" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Residual_neural_network" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Residual_neural_network"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Residual_neural_network&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Residual_neural_network&amp;action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Residual_neural_network"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Residual_neural_network&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Residual_neural_network&amp;action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Residual_neural_network" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Residual_neural_network" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Residual_neural_network&amp;oldid=1268423920" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Residual_neural_network&amp;action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&amp;page=Residual_neural_network&amp;id=1268423920&amp;wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FResidual_neural_network"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FResidual_neural_network"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&amp;page=Residual_neural_network&amp;action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Residual_neural_network&amp;printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q43744058" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Type of artificial neural network</div> <style data-mw-deduplicate="TemplateStyles:r1236090951">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}@media print{body.ns-0 .mw-parser-output .hatnote{display:none!important}}</style><div role="note" class="hatnote navigation-not-searchable">"ResNet" redirects here. For other uses, see <a href="/wiki/ResNet_(disambiguation)" class="mw-disambig" title="ResNet (disambiguation)">ResNet (disambiguation)</a>.</div> <figure class="mw-default-size mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:ResBlock.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/b/ba/ResBlock.png/220px-ResBlock.png" decoding="async" width="220" height="119" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/ba/ResBlock.png/330px-ResBlock.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/b/ba/ResBlock.png/440px-ResBlock.png 2x" data-file-width="1652" data-file-height="895" /></a><figcaption>A residual block in a deep residual network. Here, the residual connection skips two layers.</figcaption></figure> <p>A <b>residual neural network</b> (also referred to as a <b>residual network</b> or <b>ResNet</b>)<sup id="cite_ref-resnet_1-0" class="reference"><a href="#cite_note-resnet-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> is a <a href="/wiki/Deep_learning" title="Deep learning">deep learning</a> architecture in which the layers learn residual functions with reference to the layer inputs. It was developed in 2015 for <a href="/wiki/Image_recognition" class="mw-redirect" title="Image recognition">image recognition</a>, and won the <a href="/wiki/ImageNet" title="ImageNet">ImageNet</a> Large Scale Visual Recognition Challenge (<a rel="nofollow" class="external text" href="https://image-net.org/challenges/LSVRC/">ILSVRC</a>) of that year.<sup id="cite_ref-ilsvrc2015_2-0" class="reference"><a href="#cite_note-ilsvrc2015-2"><span class="cite-bracket">&#91;</span>2<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-imagenet_3-0" class="reference"><a href="#cite_note-imagenet-3"><span class="cite-bracket">&#91;</span>3<span class="cite-bracket">&#93;</span></a></sup> </p><p>As a point of terminology, "residual connection" refers to the specific architectural motif of <span class="avoidwrap" style="display:inline-block;"><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x\mapsto f(x)+x}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>x</mi> <mo stretchy="false">&#x21A6;<!-- ↦ --></mo> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>+</mo> <mi>x</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x\mapsto f(x)+x}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c9ce5caa1d71c263eddc969f5501804aad6fd3ec" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:13.531ex; height:2.843ex;" alt="{\displaystyle x\mapsto f(x)+x}"></span></span>, where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/132e57acb643253e7810ee9702d9581f159a1c61" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.279ex; height:2.509ex;" alt="{\displaystyle f}"></span> is an arbitrary neural network module. The motif had been used previously (see <a class="mw-selflink-fragment" href="#History">§History</a> for details). However, the publication of ResNet made it widely popular for <a href="/wiki/Feedforward_neural_network" title="Feedforward neural network">feedforward networks</a>, appearing in neural networks that are seemingly unrelated to ResNet. </p><p>The residual connection stabilizes the training and convergence of deep neural networks with hundreds of layers, and is a common motif in deep neural networks, such as <a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">transformer</a> models (e.g., <a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a>, and <a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPT</a> models such as <a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a>), the <a href="/wiki/AlphaGo_Zero" title="AlphaGo Zero">AlphaGo Zero</a> system, the <a href="/wiki/AlphaStar_(software)" title="AlphaStar (software)">AlphaStar</a> system, and the <a href="/wiki/AlphaFold" title="AlphaFold">AlphaFold</a> system. </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="Mathematics">Mathematics</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=1" title="Edit section: Mathematics"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-heading mw-heading3"><h3 id="Residual_connection">Residual connection</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=2" title="Edit section: Residual connection"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>In a multilayer neural network model, consider a subnetwork with a certain number of stacked layers (e.g., 2 or 3). Denote the underlying function performed by this subnetwork as <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle H(x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>H</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle H(x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/11f241aa7195bebab9d0a3c248ea97ef0c78b1ac" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.203ex; height:2.843ex;" alt="{\displaystyle H(x)}"></span>, where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>x</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/87f9e315fd7e2ba406057a97300593c4802b53e4" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.33ex; height:1.676ex;" alt="{\displaystyle x}"></span> is the input to the subnetwork. Residual learning re-parameterizes this subnetwork and lets the parameter layers represent a "residual function" <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F(x)=H(x)-x}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mi>H</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>&#x2212;<!-- − --></mo> <mi>x</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F(x)=H(x)-x}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/aec10e785447cb658d486e4b77ee2df9ad004a2e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:17.351ex; height:2.843ex;" alt="{\displaystyle F(x)=H(x)-x}"></span>. The output <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle y}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>y</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle y}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b8a6208ec717213d4317e666f1ae872e00620a0d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.155ex; height:2.009ex;" alt="{\displaystyle y}"></span> of this subnetwork is then represented as: </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle y=F(x)+x}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>y</mi> <mo>=</mo> <mi>F</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>+</mo> <mi>x</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle y=F(x)+x}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c4aeec957ce6422f097bc7d58e76dcc02a7c9aa3" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:13.304ex; height:2.843ex;" alt="{\displaystyle y=F(x)+x}"></span></dd></dl> <p>The operation of "<span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle +\ x}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo>+</mo> <mtext>&#xA0;</mtext> <mi>x</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle +\ x}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/aed4bfe1603ffc721840d5d1094da7d3ce9daabd" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.505ex; width:3.718ex; height:2.176ex;" alt="{\displaystyle +\ x}"></span>" is implemented via a "skip connection" that performs an identity mapping to connect the input of the subnetwork with its output. This connection is referred to as a "residual connection" in later work. The function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F(x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F(x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/71a82805d469cdfa7856c11d6ee756acd1dc7174" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:4.88ex; height:2.843ex;" alt="{\displaystyle F(x)}"></span> is often represented by matrix multiplication interlaced with <a href="/wiki/Activation_function" title="Activation function">activation functions</a> and normalization operations (e.g., <a href="/wiki/Batch_normalization" title="Batch normalization">batch normalization</a> or <a href="/wiki/Normalization_(machine_learning)#Layer_normalization" title="Normalization (machine learning)">layer normalization</a>). As a whole, one of these subnetworks is referred to as a "residual block".<sup id="cite_ref-resnet_1-1" class="reference"><a href="#cite_note-resnet-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> A deep residual network is constructed by simply stacking these blocks. </p><p><a href="/wiki/Long_short-term_memory" title="Long short-term memory">Long short-term memory</a> (LSTM) has a memory mechanism that serves as a residual connection.<sup id="cite_ref-lstm1997_4-0" class="reference"><a href="#cite_note-lstm1997-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup> In an LSTM without a forget <a href="/wiki/Gating_mechanism" title="Gating mechanism">gate</a>, an input <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f279a30bc8eabc788f3fe81c9cfb674e72e858db" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.156ex; height:2.009ex;" alt="{\displaystyle x_{t}}"></span> is processed by a function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/545fd099af8541605f7ee55f08225526be88ce57" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.741ex; height:2.176ex;" alt="{\displaystyle F}"></span> and added to a memory cell <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/93578e37f3234419a34df79845836bc0ec5ef76c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.833ex; height:2.009ex;" alt="{\displaystyle c_{t}}"></span>, resulting in <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t+1}=c_{t}+F(x_{t})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t+1}=c_{t}+F(x_{t})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/73d44bdefbabb949b35083ee270ce23b541fab34" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:17.411ex; height:2.843ex;" alt="{\displaystyle c_{t+1}=c_{t}+F(x_{t})}"></span>. An LSTM with a forget gate essentially functions as a <a href="/wiki/Highway_network" title="Highway network">highway network</a>. </p><p>To stabilize the <a href="/wiki/Variance" title="Variance">variance</a> of the layers' inputs, it is recommended to replace the residual connections <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x+f(x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>x</mi> <mo>+</mo> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x+f(x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1636566acc3bf415c7ce092c9f727f4094dcad3f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:8.588ex; height:2.843ex;" alt="{\displaystyle x+f(x)}"></span> with <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x/L+f(x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mo>/</mo> </mrow> <mi>L</mi> <mo>+</mo> <mi>f</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x/L+f(x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7735805308d2f1ebece5a039970a9e72f5ab45e8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:11.333ex; height:2.843ex;" alt="{\displaystyle x/L+f(x)}"></span>, where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle L}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>L</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle L}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/103168b86f781fe6e9a4a87b8ea1cebe0ad4ede8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.583ex; height:2.176ex;" alt="{\displaystyle L}"></span> is the total number of residual layers.<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span class="cite-bracket">&#91;</span>5<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Projection_connection">Projection connection</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=3" title="Edit section: Projection connection"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>If the function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/545fd099af8541605f7ee55f08225526be88ce57" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.741ex; height:2.176ex;" alt="{\displaystyle F}"></span> is of type <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F:\mathbb {R} ^{n}\to \mathbb {R} ^{m}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> <mo>:</mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> <mo stretchy="false">&#x2192;<!-- → --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>m</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F:\mathbb {R} ^{n}\to \mathbb {R} ^{m}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/ce4e55aadeaa9c838a71166dce4477f4ce9a6ce2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:13.542ex; height:2.343ex;" alt="{\displaystyle F:\mathbb {R} ^{n}\to \mathbb {R} ^{m}}"></span> where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle n\neq m}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>n</mi> <mo>&#x2260;<!-- ≠ --></mo> <mi>m</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle n\neq m}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3994a24401e2dfabca26e4f36e53097a07a57af5" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.534ex; height:2.676ex;" alt="{\displaystyle n\neq m}"></span>, then <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F(x)+x}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>+</mo> <mi>x</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F(x)+x}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/82306dd17830de33a8bb33a6625322ba317ad6e2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:9.05ex; height:2.843ex;" alt="{\displaystyle F(x)+x}"></span> is undefined. To handle this special case, a projection connection is used: </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle y=F(x)+P(x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>y</mi> <mo>=</mo> <mi>F</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>+</mo> <mi>P</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle y=F(x)+P(x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e51ed489385892b4377872fd523737eb35de8cf9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:16.859ex; height:2.843ex;" alt="{\displaystyle y=F(x)+P(x)}"></span></dd></dl> <p>where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle P}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>P</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle P}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b4dc73bf40314945ff376bd363916a738548d40a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.745ex; height:2.176ex;" alt="{\displaystyle P}"></span> is typically a linear projection, defined by <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle P(x)=Mx}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>P</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mi>M</mi> <mi>x</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle P(x)=Mx}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9372dbfca0dc4cdd37a9a430d5f38dd74762c85c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:11.755ex; height:2.843ex;" alt="{\displaystyle P(x)=Mx}"></span> where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle M}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>M</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle M}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f82cade9898ced02fdd08712e5f0c0151758a0dd" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.442ex; height:2.176ex;" alt="{\displaystyle M}"></span> is a <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle m\times n}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>m</mi> <mo>&#x00D7;<!-- × --></mo> <mi>n</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle m\times n}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/12b23d207d23dd430b93320539abbb0bde84870d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:6.276ex; height:1.676ex;" alt="{\displaystyle m\times n}"></span> matrix. The matrix is trained via <a href="/wiki/Backpropagation" title="Backpropagation">backpropagation</a>, as is any other parameter of the model. </p> <div class="mw-heading mw-heading3"><h3 id="Signal_propagation">Signal propagation</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=4" title="Edit section: Signal propagation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The introduction of identity mappings facilitates signal propagation in both forward and backward paths.<sup id="cite_ref-resnetv2_6-0" class="reference"><a href="#cite_note-resnetv2-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading4"><h4 id="Forward_propagation">Forward propagation</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=5" title="Edit section: Forward propagation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>If the output of the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \ell }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>&#x2113;<!-- ℓ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \ell }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f066e981e530bacc07efc6a10fa82deee985929e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.97ex; height:2.176ex;" alt="{\displaystyle \ell }"></span>-th residual block is the input to the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (\ell +1)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <mi>&#x2113;<!-- ℓ --></mi> <mo>+</mo> <mn>1</mn> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (\ell +1)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6e04cef624cd31431d7030278d479e8f19d65a87" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.782ex; height:2.843ex;" alt="{\displaystyle (\ell +1)}"></span>-th residual block (assuming no activation function between blocks), then the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle (\ell +1)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo stretchy="false">(</mo> <mi>&#x2113;<!-- ℓ --></mi> <mo>+</mo> <mn>1</mn> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle (\ell +1)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6e04cef624cd31431d7030278d479e8f19d65a87" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:6.782ex; height:2.843ex;" alt="{\displaystyle (\ell +1)}"></span>-th input is: </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{\ell +1}=F(x_{\ell })+x_{\ell }}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>+</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{\ell +1}=F(x_{\ell })+x_{\ell }}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5381463a54dc896c6478ef42b326c51ea16c04ba" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:18.332ex; height:2.843ex;" alt="{\displaystyle x_{\ell +1}=F(x_{\ell })+x_{\ell }}"></span></dd></dl> <p>Applying this formulation recursively, e.g.: </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\begin{aligned}x_{\ell +2}&amp;=F(x_{\ell +1})+x_{\ell +1}\\&amp;=F(x_{\ell +1})+F(x_{\ell })+x_{\ell }\end{aligned}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"> <mtr> <mtd> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> <mo>+</mo> <mn>2</mn> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>+</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> </mtd> </mtr> <mtr> <mtd /> <mtd> <mi></mi> <mo>=</mo> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo stretchy="false">)</mo> <mo>+</mo> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo>+</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mtd> </mtr> </mtable> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\begin{aligned}x_{\ell +2}&amp;=F(x_{\ell +1})+x_{\ell +1}\\&amp;=F(x_{\ell +1})+F(x_{\ell })+x_{\ell }\end{aligned}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/782f0d61edb959b3388cdaf33dd4cc692ff88d5f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -2.505ex; width:29.822ex; height:6.176ex;" alt="{\displaystyle {\begin{aligned}x_{\ell +2}&amp;=F(x_{\ell +1})+x_{\ell +1}\\&amp;=F(x_{\ell +1})+F(x_{\ell })+x_{\ell }\end{aligned}}}"></span></dd></dl> <p>yields the general relationship: </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{L}=x_{\ell }+\sum _{i=\ell }^{L-1}F(x_{i})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> </mrow> </msub> <mo>=</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> <mo>+</mo> <munderover> <mo>&#x2211;<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mi>&#x2113;<!-- ℓ --></mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{L}=x_{\ell }+\sum _{i=\ell }^{L-1}F(x_{i})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/ed464f3e25e72e1036b88a36a58fc269fda3e60d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -3.171ex; width:20.289ex; height:7.509ex;" alt="{\displaystyle x_{L}=x_{\ell }+\sum _{i=\ell }^{L-1}F(x_{i})}"></span></dd></dl> <p>where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle L}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mi>L</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle L}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8fb88de7e4d31737dae8f02575033272f29e6720" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.583ex; height:2.176ex;" alt="{\textstyle L}"></span> is the index of a residual block and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle \ell }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mi>&#x2113;<!-- ℓ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle \ell }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/554d626bee80ffdb3ab924a78a2d05c5a9e642db" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.97ex; height:2.176ex;" alt="{\textstyle \ell }"></span> is the index of some earlier block. This formulation suggests that there is always a signal that is directly sent from a shallower block <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle \ell }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mi>&#x2113;<!-- ℓ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle \ell }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/554d626bee80ffdb3ab924a78a2d05c5a9e642db" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.97ex; height:2.176ex;" alt="{\textstyle \ell }"></span> to a deeper block <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle L}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mi>L</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle L}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8fb88de7e4d31737dae8f02575033272f29e6720" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.583ex; height:2.176ex;" alt="{\textstyle L}"></span>. </p> <div class="mw-heading mw-heading4"><h4 id="Backward_propagation">Backward propagation</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=6" title="Edit section: Backward propagation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The residual learning formulation provides the added benefit of mitigating the <a href="/wiki/Vanishing_gradient_problem" title="Vanishing gradient problem">vanishing gradient problem</a> to some extent. However, it is crucial to acknowledge that the vanishing gradient issue is not the root cause of the degradation problem, which is tackled through the use of normalization. To observe the effect of residual blocks on backpropagation, consider the partial derivative of a <a href="/wiki/Loss_functions_for_classification" title="Loss functions for classification">loss function</a> <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\mathcal {E}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\mathcal {E}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/9c298ed828ff778065aeb5f0f305097f55bb9ae0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.311ex; height:2.176ex;" alt="{\displaystyle {\mathcal {E}}}"></span> with respect to some residual block input <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{\ell }}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{\ell }}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3053f1eff42f8fec1d692efa1d5e0eee60c12b30" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.248ex; height:2.009ex;" alt="{\displaystyle x_{\ell }}"></span>. Using the equation above from forward propagation for a later residual block <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle L&gt;\ell }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>L</mi> <mo>&gt;</mo> <mi>&#x2113;<!-- ℓ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle L&gt;\ell }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/55c2ccc0b11cc989eb580b34fcc306d18d4bcc5e" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:5.651ex; height:2.176ex;" alt="{\displaystyle L&gt;\ell }"></span>:<sup id="cite_ref-resnetv2_6-1" class="reference"><a href="#cite_note-resnetv2-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\begin{aligned}{\frac {\partial {\mathcal {E}}}{\partial x_{\ell }}}&amp;={\frac {\partial {\mathcal {E}}}{\partial x_{L}}}{\frac {\partial x_{L}}{\partial x_{\ell }}}\\&amp;={\frac {\partial {\mathcal {E}}}{\partial x_{L}}}\left(1+{\frac {\partial }{\partial x_{\ell }}}\sum _{i=\ell }^{L-1}F(x_{i})\right)\\&amp;={\frac {\partial {\mathcal {E}}}{\partial x_{L}}}+{\frac {\partial {\mathcal {E}}}{\partial x_{L}}}{\frac {\partial }{\partial x_{\ell }}}\sum _{i=\ell }^{L-1}F(x_{i})\end{aligned}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"> <mtr> <mtd> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mrow> </mfrac> </mrow> </mtd> <mtd> <mi></mi> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> </mrow> </msub> </mrow> </mfrac> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> </mrow> </msub> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mrow> </mfrac> </mrow> </mtd> </mtr> <mtr> <mtd /> <mtd> <mi></mi> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> </mrow> </msub> </mrow> </mfrac> </mrow> <mrow> <mo>(</mo> <mrow> <mn>1</mn> <mo>+</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mrow> </mfrac> </mrow> <munderover> <mo>&#x2211;<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mi>&#x2113;<!-- ℓ --></mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mrow> <mo>)</mo> </mrow> </mtd> </mtr> <mtr> <mtd /> <mtd> <mi></mi> <mo>=</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> </mrow> </msub> </mrow> </mfrac> </mrow> <mo>+</mo> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> </mrow> </msub> </mrow> </mfrac> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mrow> </mfrac> </mrow> <munderover> <mo>&#x2211;<!-- ∑ --></mo> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> <mo>=</mo> <mi>&#x2113;<!-- ℓ --></mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </munderover> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> </mtable> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\begin{aligned}{\frac {\partial {\mathcal {E}}}{\partial x_{\ell }}}&amp;={\frac {\partial {\mathcal {E}}}{\partial x_{L}}}{\frac {\partial x_{L}}{\partial x_{\ell }}}\\&amp;={\frac {\partial {\mathcal {E}}}{\partial x_{L}}}\left(1+{\frac {\partial }{\partial x_{\ell }}}\sum _{i=\ell }^{L-1}F(x_{i})\right)\\&amp;={\frac {\partial {\mathcal {E}}}{\partial x_{L}}}+{\frac {\partial {\mathcal {E}}}{\partial x_{L}}}{\frac {\partial }{\partial x_{\ell }}}\sum _{i=\ell }^{L-1}F(x_{i})\end{aligned}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f2f8d63293b43f4715d382dd1ab800d7f6fc69e1" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -10.005ex; width:35.369ex; height:21.176ex;" alt="{\displaystyle {\begin{aligned}{\frac {\partial {\mathcal {E}}}{\partial x_{\ell }}}&amp;={\frac {\partial {\mathcal {E}}}{\partial x_{L}}}{\frac {\partial x_{L}}{\partial x_{\ell }}}\\&amp;={\frac {\partial {\mathcal {E}}}{\partial x_{L}}}\left(1+{\frac {\partial }{\partial x_{\ell }}}\sum _{i=\ell }^{L-1}F(x_{i})\right)\\&amp;={\frac {\partial {\mathcal {E}}}{\partial x_{L}}}+{\frac {\partial {\mathcal {E}}}{\partial x_{L}}}{\frac {\partial }{\partial x_{\ell }}}\sum _{i=\ell }^{L-1}F(x_{i})\end{aligned}}}"></span></dd></dl> <p>This formulation suggests that the gradient computation of a shallower layer, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{\ell }}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mrow> </mfrac> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{\ell }}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/75de64c2cb5928655ac6775b47fe44b930f3d138" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.671ex; width:3.429ex; height:4.176ex;" alt="{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{\ell }}}}"></span>, always has a later term <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{L}}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> </mrow> </msub> </mrow> </mfrac> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{L}}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/28364496b63442451b27ef8651ca8130f4c5b79f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.671ex; width:3.781ex; height:4.176ex;" alt="{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{L}}}}"></span> that is directly added. Even if the gradients of the <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F(x_{i})}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F(x_{i})}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/54290e388c0dd2693499b3e01585c9755079c16c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:5.679ex; height:2.843ex;" alt="{\displaystyle F(x_{i})}"></span> terms are small, the total gradient <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{\ell }}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mrow> </mfrac> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{\ell }}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/75de64c2cb5928655ac6775b47fe44b930f3d138" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.671ex; width:3.429ex; height:4.176ex;" alt="{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{\ell }}}}"></span> resists vanishing due to the added term <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{L}}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="false" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mfrac> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mi class="MJX-tex-caligraphic" mathvariant="script">E</mi> </mrow> </mrow> </mrow> <mrow> <mi mathvariant="normal">&#x2202;<!-- ∂ --></mi> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>L</mi> </mrow> </msub> </mrow> </mfrac> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{L}}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/28364496b63442451b27ef8651ca8130f4c5b79f" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.671ex; width:3.781ex; height:4.176ex;" alt="{\textstyle {\frac {\partial {\mathcal {E}}}{\partial x_{L}}}}"></span>. </p> <div class="mw-heading mw-heading2"><h2 id="Variants_of_residual_blocks">Variants of residual blocks</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=7" title="Edit section: Variants of residual blocks"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:ResBlockVariants.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/8/86/ResBlockVariants.png/220px-ResBlockVariants.png" decoding="async" width="220" height="79" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/86/ResBlockVariants.png/330px-ResBlockVariants.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/8/86/ResBlockVariants.png/440px-ResBlockVariants.png 2x" data-file-width="3081" data-file-height="1102" /></a><figcaption>Two variants of convolutional Residual Blocks.<sup id="cite_ref-resnet_1-2" class="reference"><a href="#cite_note-resnet-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> <b>Left</b>: a <i>basic block</i> that has two 3x3 convolutional layers. <b>Right</b>: a <i>bottleneck block</i> that has a 1x1 convolutional layer for dimension reduction, a 3x3 convolutional layer, and another 1x1 convolutional layer for dimension restoration.</figcaption></figure> <div class="mw-heading mw-heading3"><h3 id="Basic_block">Basic block</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=8" title="Edit section: Basic block"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>A <i>basic block</i> is the simplest building block studied in the original ResNet.<sup id="cite_ref-resnet_1-3" class="reference"><a href="#cite_note-resnet-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> This block consists of two sequential 3x3 <a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">convolutional</a> layers and a residual connection. The input and output dimensions of both layers are equal. </p> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:ResNet_block.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/b/b3/ResNet_block.svg/220px-ResNet_block.svg.png" decoding="async" width="220" height="139" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/b3/ResNet_block.svg/330px-ResNet_block.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/b/b3/ResNet_block.svg/440px-ResNet_block.svg.png 2x" data-file-width="545" data-file-height="345" /></a><figcaption>Block diagram of ResNet (2015). It shows a ResNet block with and without the 1x1 convolution. The 1x1 convolution (with stride) can be used to change the shape of the array, which is necessary for residual connection through an upsampling/downsampling layer.</figcaption></figure> <div class="mw-heading mw-heading3"><h3 id="Bottleneck_block">Bottleneck block</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=9" title="Edit section: Bottleneck block"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>A <i>bottleneck block</i><sup id="cite_ref-resnet_1-4" class="reference"><a href="#cite_note-resnet-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> consists of three sequential convolutional layers and a residual connection. The first layer in this block is a 1x1 convolution for dimension reduction (e.g., to 1/2 of the input dimension); the second layer performs a 3x3 convolution; the last layer is another 1x1 convolution for dimension restoration. The models of ResNet-50, ResNet-101, and ResNet-152 are all based on bottleneck blocks.<sup id="cite_ref-resnet_1-5" class="reference"><a href="#cite_note-resnet-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Pre-activation_block">Pre-activation block</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=10" title="Edit section: Pre-activation block"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The <i>pre-activation residual block</i><sup id="cite_ref-resnetv2_6-2" class="reference"><a href="#cite_note-resnetv2-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> applies activation functions before applying the residual function <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle F}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>F</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle F}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/545fd099af8541605f7ee55f08225526be88ce57" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.741ex; height:2.176ex;" alt="{\displaystyle F}"></span>. Formally, the computation of a pre-activation residual block can be written as: </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{\ell +1}=F(\phi (x_{\ell }))+x_{\ell }}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <mi>F</mi> <mo stretchy="false">(</mo> <mi>&#x03D5;<!-- ϕ --></mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> <mo stretchy="false">)</mo> <mo stretchy="false">)</mo> <mo>+</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{\ell +1}=F(\phi (x_{\ell }))+x_{\ell }}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/bc1bb6e183eaac49cde948630067c739fe28d254" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:21.527ex; height:2.843ex;" alt="{\displaystyle x_{\ell +1}=F(\phi (x_{\ell }))+x_{\ell }}"></span></dd></dl> <p>where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \phi }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>&#x03D5;<!-- ϕ --></mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \phi }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/72b1f30316670aee6270a28334bdf4f5072cdde4" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.385ex; height:2.509ex;" alt="{\displaystyle \phi }"></span> can be any activation (e.g. <a href="/wiki/ReLU" class="mw-redirect" title="ReLU">ReLU</a>) or normalization (e.g. <a href="/wiki/LayerNorm" class="mw-redirect" title="LayerNorm">LayerNorm</a>) operation. This design reduces the number of non-identity mappings between residual blocks. This design was used to train models with 200 to over 1000 layers.<sup id="cite_ref-resnetv2_6-3" class="reference"><a href="#cite_note-resnetv2-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> </p><p>Since <a href="/wiki/GPT-2" title="GPT-2">GPT-2</a>, <a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">transformer</a> blocks have been mostly implemented as pre-activation blocks. This is often referred to as "pre-normalization" in the literature of transformer models.<sup id="cite_ref-gpt2paper_7-0" class="reference"><a href="#cite_note-gpt2paper-7"><span class="cite-bracket">&#91;</span>7<span class="cite-bracket">&#93;</span></a></sup> </p> <figure typeof="mw:File/Thumb"><a href="/wiki/File:Resnet-18_architecture.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/6/6f/Resnet-18_architecture.svg/176px-Resnet-18_architecture.svg.png" decoding="async" width="176" height="614" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/6/6f/Resnet-18_architecture.svg/264px-Resnet-18_architecture.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/6/6f/Resnet-18_architecture.svg/352px-Resnet-18_architecture.svg.png 2x" data-file-width="201" data-file-height="701" /></a><figcaption>The original Resnet-18 architecture. Up to 152 layers were trained in the original publication (as "ResNet-152").<sup id="cite_ref-:1_8-0" class="reference"><a href="#cite_note-:1-8"><span class="cite-bracket">&#91;</span>8<span class="cite-bracket">&#93;</span></a></sup></figcaption></figure> <div class="mw-heading mw-heading2"><h2 id="Applications">Applications</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=11" title="Edit section: Applications"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div><p> Originally, ResNet was designed for <a href="/wiki/Computer_vision" title="Computer vision">computer vision</a>.<sup id="cite_ref-resnet_1-6" class="reference"><a href="#cite_note-resnet-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-:1_8-1" class="reference"><a href="#cite_note-:1-8"><span class="cite-bracket">&#91;</span>8<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-inceptionv42_9-0" class="reference"><a href="#cite_note-inceptionv42-9"><span class="cite-bracket">&#91;</span>9<span class="cite-bracket">&#93;</span></a></sup></p><figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Transformer,_full_architecture.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/3/34/Transformer%2C_full_architecture.png/220px-Transformer%2C_full_architecture.png" decoding="async" width="220" height="231" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/34/Transformer%2C_full_architecture.png/330px-Transformer%2C_full_architecture.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/34/Transformer%2C_full_architecture.png/440px-Transformer%2C_full_architecture.png 2x" data-file-width="1426" data-file-height="1500" /></a><figcaption>The Transformer architecture includes residual connections.</figcaption></figure> <p>All transformer architectures include residual connections. Indeed, very deep transformers cannot be trained without them.<sup id="cite_ref-lose_rank_10-0" class="reference"><a href="#cite_note-lose_rank-10"><span class="cite-bracket">&#91;</span>10<span class="cite-bracket">&#93;</span></a></sup> </p><p>The original ResNet paper made no claim on being inspired by biological systems. However, later research has related ResNet to biologically-plausible algorithms.<sup id="cite_ref-liao2016_11-0" class="reference"><a href="#cite_note-liao2016-11"><span class="cite-bracket">&#91;</span>11<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-xiao2018_12-0" class="reference"><a href="#cite_note-xiao2018-12"><span class="cite-bracket">&#91;</span>12<span class="cite-bracket">&#93;</span></a></sup> </p><p>A study published in <a href="/wiki/Science_(journal)" title="Science (journal)"><i>Science</i></a> in 2023<sup id="cite_ref-Winding2023_13-0" class="reference"><a href="#cite_note-Winding2023-13"><span class="cite-bracket">&#91;</span>13<span class="cite-bracket">&#93;</span></a></sup> disclosed the complete <a href="/wiki/Connectome" title="Connectome">connectome</a> of an insect brain (specifically that of a fruit fly larva). This study discovered "multilayer shortcuts" that resemble the skip connections in artificial neural networks, including ResNets. </p> <div class="mw-heading mw-heading2"><h2 id="History">History</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=12" title="Edit section: History"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-heading mw-heading3"><h3 id="Previous_work">Previous work</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=13" title="Edit section: Previous work"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Residual connections were noticed in <a href="/wiki/Neuroanatomy" title="Neuroanatomy">neuroanatomy</a>, such as <a href="/wiki/Rafael_Lorente_de_N%C3%B3" title="Rafael Lorente de Nó">Lorente de No</a> (1938).<sup id="cite_ref-14" class="reference"><a href="#cite_note-14"><span class="cite-bracket">&#91;</span>14<span class="cite-bracket">&#93;</span></a></sup><sup class="reference nowrap"><span title="Location: Fig 3">&#58;&#8202;Fig 3&#8202;</span></sup> <a href="/wiki/Warren_Sturgis_McCulloch" title="Warren Sturgis McCulloch">McCulloch</a> and <a href="/wiki/Walter_Pitts" title="Walter Pitts">Pitts</a> (1943) proposed artificial neural networks and considered those with residual connections.<sup id="cite_ref-15" class="reference"><a href="#cite_note-15"><span class="cite-bracket">&#91;</span>15<span class="cite-bracket">&#93;</span></a></sup><sup class="reference nowrap"><span title="Location: Fig 1.h">&#58;&#8202;Fig 1.h&#8202;</span></sup> </p><p>In 1961, <a href="/wiki/Frank_Rosenblatt" title="Frank Rosenblatt">Frank Rosenblatt</a> described a three-layer <a href="/wiki/Multilayer_perceptron" title="Multilayer perceptron">multilayer perceptron</a> (MLP) model with skip connections.<sup id="cite_ref-mlpbook_16-0" class="reference"><a href="#cite_note-mlpbook-16"><span class="cite-bracket">&#91;</span>16<span class="cite-bracket">&#93;</span></a></sup><sup class="reference nowrap"><span title="Page: 313, Chapter 15">&#58;&#8202;313,&#8202;Chapter 15&#8202;</span></sup> The model was referred to as a "cross-coupled system", and the skip connections were forms of cross-coupled connections. </p><p> During the late 1980s, "skip-layer" connections were sometimes used in neural networks. Examples include:<sup id="cite_ref-17" class="reference"><a href="#cite_note-17"><span class="cite-bracket">&#91;</span>17<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-massbook_18-0" class="reference"><a href="#cite_note-massbook-18"><span class="cite-bracket">&#91;</span>18<span class="cite-bracket">&#93;</span></a></sup> Lang and Witbrock (1988)<sup id="cite_ref-skip1988_19-0" class="reference"><a href="#cite_note-skip1988-19"><span class="cite-bracket">&#91;</span>19<span class="cite-bracket">&#93;</span></a></sup> trained a fully connected feedforward network where each layer skip-connects to all subsequent layers, like the later DenseNet (2016). In this work, the residual connection was the form <span class="avoidwrap" style="display:inline-block;"><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x\mapsto F(x)+P(x)}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>x</mi> <mo stretchy="false">&#x21A6;<!-- ↦ --></mo> <mi>F</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>+</mo> <mi>P</mi> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x\mapsto F(x)+P(x)}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8c3dfe9d901cf5d246a3409679805d9a49108643" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:17.548ex; height:2.843ex;" alt="{\displaystyle x\mapsto F(x)+P(x)}"></span>,</span> where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle P}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>P</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle P}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b4dc73bf40314945ff376bd363916a738548d40a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.745ex; height:2.176ex;" alt="{\displaystyle P}"></span> is a randomly-initialized projection connection. They termed it a "short-cut connection".</p><figure class="mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:LSTM_3.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/f/fd/LSTM_3.svg/300px-LSTM_3.svg.png" decoding="async" width="300" height="155" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/f/fd/LSTM_3.svg/450px-LSTM_3.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/f/fd/LSTM_3.svg/600px-LSTM_3.svg.png 2x" data-file-width="552" data-file-height="285" /></a><figcaption>The long short-term memory (LSTM) cell can process data sequentially and keep its hidden state through time. The cell state <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/93578e37f3234419a34df79845836bc0ec5ef76c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.833ex; height:2.009ex;" alt="{\displaystyle c_{t}}"></span> can function as a generalized residual connection.</figcaption></figure> <div class="mw-heading mw-heading3"><h3 id="Degradation_problem">Degradation problem</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=14" title="Edit section: Degradation problem"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><a href="/wiki/Sepp_Hochreiter" title="Sepp Hochreiter">Sepp Hochreiter</a> discovered the <a href="/wiki/Vanishing_gradient_problem" title="Vanishing gradient problem">vanishing gradient problem</a> in 1991<sup id="cite_ref-hochreiter1991_20-0" class="reference"><a href="#cite_note-hochreiter1991-20"><span class="cite-bracket">&#91;</span>20<span class="cite-bracket">&#93;</span></a></sup> and argued that it explained why the then-prevalent forms of <a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">recurrent neural networks</a> did not work for long sequences. He and <a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Schmidhuber</a> later designed the LSTM architecture to solve this problem,<sup id="cite_ref-lstm1997_4-1" class="reference"><a href="#cite_note-lstm1997-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-lstm2000_21-0" class="reference"><a href="#cite_note-lstm2000-21"><span class="cite-bracket">&#91;</span>21<span class="cite-bracket">&#93;</span></a></sup> which has a "cell state" <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/93578e37f3234419a34df79845836bc0ec5ef76c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.833ex; height:2.009ex;" alt="{\displaystyle c_{t}}"></span> that can function as a generalized residual connection. The <a href="/wiki/Highway_network" title="Highway network">highway network</a> (2015)<sup id="cite_ref-highway2015may_22-0" class="reference"><a href="#cite_note-highway2015may-22"><span class="cite-bracket">&#91;</span>22<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-highway2015july_23-0" class="reference"><a href="#cite_note-highway2015july-23"><span class="cite-bracket">&#91;</span>23<span class="cite-bracket">&#93;</span></a></sup> applied the idea of an LSTM <a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">unfolded in time</a> to <a href="/wiki/Feedforward_neural_network" title="Feedforward neural network">feedforward neural networks</a>, resulting in the highway network. ResNet is equivalent to an open-gated highway network. </p> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Recurrent_neural_network_unfold.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/b/b5/Recurrent_neural_network_unfold.svg/220px-Recurrent_neural_network_unfold.svg.png" decoding="async" width="220" height="73" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/b5/Recurrent_neural_network_unfold.svg/330px-Recurrent_neural_network_unfold.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/b/b5/Recurrent_neural_network_unfold.svg/440px-Recurrent_neural_network_unfold.svg.png 2x" data-file-width="2126" data-file-height="709" /></a><figcaption>Standard (left) and unfolded (right) basic recurrent neural network</figcaption></figure> <p>During the early days of deep learning, there were attempts to train increasingly deep models. Notable examples included the <a href="/wiki/AlexNet" title="AlexNet">AlexNet</a> (2012), which had 8 layers, and the <a href="/wiki/VGGNet" title="VGGNet">VGG-19</a> (2014), which had 19 layers.<sup id="cite_ref-simonyan2015very_24-0" class="reference"><a href="#cite_note-simonyan2015very-24"><span class="cite-bracket">&#91;</span>24<span class="cite-bracket">&#93;</span></a></sup> However, stacking too many layers led to a steep reduction in <a href="/wiki/Training,_validation,_and_test_data_sets" title="Training, validation, and test data sets">training</a> accuracy,<sup id="cite_ref-prelu_25-0" class="reference"><a href="#cite_note-prelu-25"><span class="cite-bracket">&#91;</span>25<span class="cite-bracket">&#93;</span></a></sup> known as the "degradation" problem.<sup id="cite_ref-resnet_1-7" class="reference"><a href="#cite_note-resnet-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> In theory, adding additional layers to deepen a network should not result in a higher training <a href="/wiki/Loss_functions_for_classification" title="Loss functions for classification">loss</a>, but this is what happened with <a href="/wiki/VGGNet" title="VGGNet">VGGNet</a>.<sup id="cite_ref-resnet_1-8" class="reference"><a href="#cite_note-resnet-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> If the extra layers can be set as <a href="/wiki/Identity_mapping" class="mw-redirect" title="Identity mapping">identity mappings</a>, however, then the deeper network would represent the same function as its shallower counterpart. There is some evidence that the optimizer is not able to approach identity mappings for the parameterized layers, and the benefit of residual connections was to allow identity mappings by default.<sup id="cite_ref-resnetv2_6-4" class="reference"><a href="#cite_note-resnetv2-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> </p><p>In 2014, the state of the art was training deep neural networks with 20 to 30 layers.<sup id="cite_ref-simonyan2015very_24-1" class="reference"><a href="#cite_note-simonyan2015very-24"><span class="cite-bracket">&#91;</span>24<span class="cite-bracket">&#93;</span></a></sup> The research team for ResNet attempted to train deeper ones by empirically testing various methods for training deeper networks, until they came upon the ResNet architecture.<sup id="cite_ref-26" class="reference"><a href="#cite_note-26"><span class="cite-bracket">&#91;</span>26<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Subsequent_work">Subsequent work</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=15" title="Edit section: Subsequent work"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><b>DenseNet</b> (2016)<sup id="cite_ref-27" class="reference"><a href="#cite_note-27"><span class="cite-bracket">&#91;</span>27<span class="cite-bracket">&#93;</span></a></sup> connects the output of each layer to the input to each subsequent layer: </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{\ell +1}=F(x_{1},x_{2},\dots ,x_{\ell -1},x_{\ell })}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> <mo>+</mo> <mn>1</mn> </mrow> </msub> <mo>=</mo> <mi>F</mi> <mo stretchy="false">(</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>2</mn> </mrow> </msub> <mo>,</mo> <mo>&#x2026;<!-- … --></mo> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>,</mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>&#x2113;<!-- ℓ --></mi> </mrow> </msub> <mo stretchy="false">)</mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{\ell +1}=F(x_{1},x_{2},\dots ,x_{\ell -1},x_{\ell })}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/c70160e717bcf4ff486a0011b4bde13f37835329" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:29.606ex; height:2.843ex;" alt="{\displaystyle x_{\ell +1}=F(x_{1},x_{2},\dots ,x_{\ell -1},x_{\ell })}"></span></dd></dl> <p><b>Stochastic depth</b><sup id="cite_ref-28" class="reference"><a href="#cite_note-28"><span class="cite-bracket">&#91;</span>28<span class="cite-bracket">&#93;</span></a></sup> is a <a href="/wiki/Regularization_(mathematics)" title="Regularization (mathematics)">regularization</a> method that randomly drops a subset of layers and lets the signal propagate through the identity skip connections. Also known as <i>DropPath</i>, this regularizes training for deep models, such as <a href="/wiki/Vision_transformer" title="Vision transformer">vision transformers</a>.<sup id="cite_ref-29" class="reference"><a href="#cite_note-29"><span class="cite-bracket">&#91;</span>29<span class="cite-bracket">&#93;</span></a></sup></p><figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:ResNext_block.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/c/c3/ResNext_block.svg/220px-ResNext_block.svg.png" decoding="async" width="220" height="133" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/c/c3/ResNext_block.svg/330px-ResNext_block.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/c/c3/ResNext_block.svg/440px-ResNext_block.svg.png 2x" data-file-width="641" data-file-height="387" /></a><figcaption>ResNeXt block diagram</figcaption></figure> <p><b>ResNeXt</b> (2017) combines the <a href="/wiki/Inception_(deep_learning_architecture)" title="Inception (deep learning architecture)">Inception module</a> with ResNet.<sup id="cite_ref-30" class="reference"><a href="#cite_note-30"><span class="cite-bracket">&#91;</span>30<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-:1_8-2" class="reference"><a href="#cite_note-:1-8"><span class="cite-bracket">&#91;</span>8<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>Squeeze-and-Excitation Networks</b> (2018) added squeeze-and-excitation (SE) modules to ResNet.<sup id="cite_ref-31" class="reference"><a href="#cite_note-31"><span class="cite-bracket">&#91;</span>31<span class="cite-bracket">&#93;</span></a></sup> An SE module is applied after a convolution, and takes a tensor of shape <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \mathbb {R} ^{H\times W\times C}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>H</mi> <mo>&#x00D7;<!-- × --></mo> <mi>W</mi> <mo>&#x00D7;<!-- × --></mo> <mi>C</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \mathbb {R} ^{H\times W\times C}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/017d01e20d272f34402165cca04b85e2b6cb1a62" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:8.898ex; height:2.676ex;" alt="{\displaystyle \mathbb {R} ^{H\times W\times C}}"></span> (height, width, channels) as input. Each channel is averaged, resulting in a vector of shape <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \mathbb {R} ^{C}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>C</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \mathbb {R} ^{C}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/81aec4687bbf2159e80b972672af27bd5dd0feac" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:3.159ex; height:2.676ex;" alt="{\displaystyle \mathbb {R} ^{C}}"></span>. This is then passed through a <a href="/wiki/Multilayer_perceptron" title="Multilayer perceptron">multilayer perceptron</a> (with an architecture such as <i>linear-ReLU-linear-sigmoid</i>) before it is multiplied with the original tensor. </p> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Residual_neural_network&amp;action=edit&amp;section=16" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-resnet-1"><span class="mw-cite-backlink">^ <a href="#cite_ref-resnet_1-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-resnet_1-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-resnet_1-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-resnet_1-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-resnet_1-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-resnet_1-5"><sup><i><b>f</b></i></sup></a> <a href="#cite_ref-resnet_1-6"><sup><i><b>g</b></i></sup></a> <a href="#cite_ref-resnet_1-7"><sup><i><b>h</b></i></sup></a> <a href="#cite_ref-resnet_1-8"><sup><i><b>i</b></i></sup></a></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite id="CITEREFHeZhangRenSun2016" class="citation conference cs1"><a href="/wiki/Kaiming_He" title="Kaiming He">He, Kaiming</a>; Zhang, Xiangyu; Ren, Shaoqing; Sun, Jian (2016). <a rel="nofollow" class="external text" href="https://openaccess.thecvf.com/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf"><i>Deep Residual Learning for Image Recognition</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/Conference_on_Computer_Vision_and_Pattern_Recognition" title="Conference on Computer Vision and Pattern Recognition">Conference on Computer Vision and Pattern Recognition</a>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1512.03385">1512.03385</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FCVPR.2016.90">10.1109/CVPR.2016.90</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Deep+Residual+Learning+for+Image+Recognition&amp;rft.date=2016&amp;rft_id=info%3Aarxiv%2F1512.03385&amp;rft_id=info%3Adoi%2F10.1109%2FCVPR.2016.90&amp;rft.aulast=He&amp;rft.aufirst=Kaiming&amp;rft.au=Zhang%2C+Xiangyu&amp;rft.au=Ren%2C+Shaoqing&amp;rft.au=Sun%2C+Jian&amp;rft_id=https%3A%2F%2Fopenaccess.thecvf.com%2Fcontent_cvpr_2016%2Fpapers%2FHe_Deep_Residual_Learning_CVPR_2016_paper.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-ilsvrc2015-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-ilsvrc2015_2-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://image-net.org/challenges/LSVRC/2015/results.php">"ILSVRC2015 Results"</a>. <i>image-net.org</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=image-net.org&amp;rft.atitle=ILSVRC2015+Results&amp;rft_id=https%3A%2F%2Fimage-net.org%2Fchallenges%2FLSVRC%2F2015%2Fresults.php&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-imagenet-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-imagenet_3-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFDengDongSocherLi2009" class="citation conference cs1">Deng, Jia; Dong, Wei; Socher, Richard; Li, Li-Jia; Li, Kai; <a href="/wiki/Fei-Fei_Li" title="Fei-Fei Li">Li, Fei-Fei</a> (2009). <i>ImageNet: A large-scale hierarchical image database</i>. <a href="/wiki/Conference_on_Computer_Vision_and_Pattern_Recognition" title="Conference on Computer Vision and Pattern Recognition">Conference on Computer Vision and Pattern Recognition</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FCVPR.2009.5206848">10.1109/CVPR.2009.5206848</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=ImageNet%3A+A+large-scale+hierarchical+image+database&amp;rft.date=2009&amp;rft_id=info%3Adoi%2F10.1109%2FCVPR.2009.5206848&amp;rft.aulast=Deng&amp;rft.aufirst=Jia&amp;rft.au=Dong%2C+Wei&amp;rft.au=Socher%2C+Richard&amp;rft.au=Li%2C+Li-Jia&amp;rft.au=Li%2C+Kai&amp;rft.au=Li%2C+Fei-Fei&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-lstm1997-4"><span class="mw-cite-backlink">^ <a href="#cite_ref-lstm1997_4-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-lstm1997_4-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSepp_HochreiterJürgen_Schmidhuber1997" class="citation journal cs1"><a href="/wiki/Sepp_Hochreiter" title="Sepp Hochreiter">Sepp Hochreiter</a>; <a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a> (1997). <a rel="nofollow" class="external text" href="https://www.researchgate.net/publication/13853244">"Long short-term memory"</a>. <i><a href="/wiki/Neural_Computation_(journal)" title="Neural Computation (journal)">Neural Computation</a></i>. <b>9</b> (8): <span class="nowrap">1735–</span>1780. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1162%2Fneco.1997.9.8.1735">10.1162/neco.1997.9.8.1735</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/9377276">9377276</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:1915014">1915014</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Neural+Computation&amp;rft.atitle=Long+short-term+memory&amp;rft.volume=9&amp;rft.issue=8&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1735-%3C%2Fspan%3E1780&amp;rft.date=1997&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A1915014%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F9377276&amp;rft_id=info%3Adoi%2F10.1162%2Fneco.1997.9.8.1735&amp;rft.au=Sepp+Hochreiter&amp;rft.au=J%C3%BCrgen+Schmidhuber&amp;rft_id=https%3A%2F%2Fwww.researchgate.net%2Fpublication%2F13853244&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHaninRolnick2018" class="citation conference cs1">Hanin, Boris; Rolnick, David (2018). <a rel="nofollow" class="external text" href="https://proceedings.neurips.cc/paper/2018/hash/d81f9c1be2e08964bf9f24b15f0e4900-Paper.pdf"><i>How to Start Training: The Effect of Initialization and Architecture</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/Conference_on_Neural_Information_Processing_Systems" title="Conference on Neural Information Processing Systems">Conference on Neural Information Processing Systems</a>. Vol.&#160;31. Curran Associates, Inc. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1803.01719">1803.01719</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=How+to+Start+Training%3A+The+Effect+of+Initialization+and+Architecture&amp;rft.pub=Curran+Associates%2C+Inc.&amp;rft.date=2018&amp;rft_id=info%3Aarxiv%2F1803.01719&amp;rft.aulast=Hanin&amp;rft.aufirst=Boris&amp;rft.au=Rolnick%2C+David&amp;rft_id=https%3A%2F%2Fproceedings.neurips.cc%2Fpaper%2F2018%2Fhash%2Fd81f9c1be2e08964bf9f24b15f0e4900-Paper.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-resnetv2-6"><span class="mw-cite-backlink">^ <a href="#cite_ref-resnetv2_6-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-resnetv2_6-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-resnetv2_6-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-resnetv2_6-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-resnetv2_6-4"><sup><i><b>e</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHeZhangRenSun2016" class="citation conference cs1"><a href="/wiki/Kaiming_He" title="Kaiming He">He, Kaiming</a>; Zhang, Xiangyu; Ren, Shaoqing; Sun, Jian (2016). <a rel="nofollow" class="external text" href="https://link.springer.com/content/pdf/10.1007/978-3-319-46493-0_38.pdf"><i>Identity Mappings in Deep Residual Networks</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/European_Conference_on_Computer_Vision" title="European Conference on Computer Vision">European Conference on Computer Vision</a>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1603.05027">1603.05027</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F978-3-319-46493-0_38">10.1007/978-3-319-46493-0_38</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Identity+Mappings+in+Deep+Residual+Networks&amp;rft.date=2016&amp;rft_id=info%3Aarxiv%2F1603.05027&amp;rft_id=info%3Adoi%2F10.1007%2F978-3-319-46493-0_38&amp;rft.aulast=He&amp;rft.aufirst=Kaiming&amp;rft.au=Zhang%2C+Xiangyu&amp;rft.au=Ren%2C+Shaoqing&amp;rft.au=Sun%2C+Jian&amp;rft_id=https%3A%2F%2Flink.springer.com%2Fcontent%2Fpdf%2F10.1007%2F978-3-319-46493-0_38.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-gpt2paper-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-gpt2paper_7-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFRadfordWuChildLuan2019" class="citation web cs1">Radford, Alec; Wu, Jeffrey; Child, Rewon; Luan, David; Amodei, Dario; Sutskever, Ilya (14 February 2019). <a rel="nofollow" class="external text" href="https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf">"Language models are unsupervised multitask learners"</a> <span class="cs1-format">(PDF)</span>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20210206183945/https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf">Archived</a> <span class="cs1-format">(PDF)</span> from the original on 6 February 2021<span class="reference-accessdate">. Retrieved <span class="nowrap">19 December</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Language+models+are+unsupervised+multitask+learners&amp;rft.date=2019-02-14&amp;rft.aulast=Radford&amp;rft.aufirst=Alec&amp;rft.au=Wu%2C+Jeffrey&amp;rft.au=Child%2C+Rewon&amp;rft.au=Luan%2C+David&amp;rft.au=Amodei%2C+Dario&amp;rft.au=Sutskever%2C+Ilya&amp;rft_id=https%3A%2F%2Fcdn.openai.com%2Fbetter-language-models%2Flanguage_models_are_unsupervised_multitask_learners.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-:1-8"><span class="mw-cite-backlink">^ <a href="#cite_ref-:1_8-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-:1_8-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-:1_8-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFZhangLiptonLiSmola2024" class="citation book cs1">Zhang, Aston; Lipton, Zachary; Li, Mu; Smola, Alexander J. (2024). <a rel="nofollow" class="external text" href="https://d2l.ai/chapter_convolutional-modern/resnet.html">"8.6. Residual Networks (ResNet) and ResNeXt"</a>. <i>Dive into deep learning</i>. Cambridge New York Port Melbourne New Delhi Singapore: Cambridge University Press. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-009-38943-3" title="Special:BookSources/978-1-009-38943-3"><bdi>978-1-009-38943-3</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=8.6.+Residual+Networks+%28ResNet%29+and+ResNeXt&amp;rft.btitle=Dive+into+deep+learning&amp;rft.place=Cambridge+New+York+Port+Melbourne+New+Delhi+Singapore&amp;rft.pub=Cambridge+University+Press&amp;rft.date=2024&amp;rft.isbn=978-1-009-38943-3&amp;rft.aulast=Zhang&amp;rft.aufirst=Aston&amp;rft.au=Lipton%2C+Zachary&amp;rft.au=Li%2C+Mu&amp;rft.au=Smola%2C+Alexander+J.&amp;rft_id=https%3A%2F%2Fd2l.ai%2Fchapter_convolutional-modern%2Fresnet.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-inceptionv42-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-inceptionv42_9-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSzegedyIoffeVanhouckeAlemi2017" class="citation conference cs1">Szegedy, Christian; Ioffe, Sergey; Vanhoucke, Vincent; Alemi, Alex (2017). <a rel="nofollow" class="external text" href="https://cdn.aaai.org/ojs/11231/11231-13-14759-1-2-20201228.pdf"><i>Inception-v4, Inception-ResNet and the impact of residual connections on learning</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/AAAI_Conference_on_Artificial_Intelligence" title="AAAI Conference on Artificial Intelligence">AAAI Conference on Artificial Intelligence</a>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1602.07261">1602.07261</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1609%2Faaai.v31i1.11231">10.1609/aaai.v31i1.11231</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Inception-v4%2C+Inception-ResNet+and+the+impact+of+residual+connections+on+learning&amp;rft.date=2017&amp;rft_id=info%3Aarxiv%2F1602.07261&amp;rft_id=info%3Adoi%2F10.1609%2Faaai.v31i1.11231&amp;rft.aulast=Szegedy&amp;rft.aufirst=Christian&amp;rft.au=Ioffe%2C+Sergey&amp;rft.au=Vanhoucke%2C+Vincent&amp;rft.au=Alemi%2C+Alex&amp;rft_id=https%3A%2F%2Fcdn.aaai.org%2Fojs%2F11231%2F11231-13-14759-1-2-20201228.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-lose_rank-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-lose_rank_10-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFDongCordonnierLoukas2021" class="citation conference cs1">Dong, Yihe; Cordonnier, Jean-Baptiste; Loukas, Andreas (2021). <a rel="nofollow" class="external text" href="http://proceedings.mlr.press/v139/dong21a/dong21a.pdf"><i>Attention is not all you need: pure attention loses rank doubly exponentially with depth</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/International_Conference_on_Machine_Learning" title="International Conference on Machine Learning">International Conference on Machine Learning</a>. PMLR. pp.&#160;<span class="nowrap">2793–</span>2803. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2103.03404">2103.03404</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Attention+is+not+all+you+need%3A+pure+attention+loses+rank+doubly+exponentially+with+depth&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E2793-%3C%2Fspan%3E2803&amp;rft.pub=PMLR&amp;rft.date=2021&amp;rft_id=info%3Aarxiv%2F2103.03404&amp;rft.aulast=Dong&amp;rft.aufirst=Yihe&amp;rft.au=Cordonnier%2C+Jean-Baptiste&amp;rft.au=Loukas%2C+Andreas&amp;rft_id=http%3A%2F%2Fproceedings.mlr.press%2Fv139%2Fdong21a%2Fdong21a.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-liao2016-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-liao2016_11-0">^</a></b></span> <span class="reference-text"> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLiaoPoggio2016" class="citation arxiv cs1">Liao, Qianli; Poggio, Tomaso (2016). "Bridging the Gaps Between Residual Learning, Recurrent Neural Networks and Visual Cortex". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1604.03640">1604.03640</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Bridging+the+Gaps+Between+Residual+Learning%2C+Recurrent+Neural+Networks+and+Visual+Cortex&amp;rft.date=2016&amp;rft_id=info%3Aarxiv%2F1604.03640&amp;rft.aulast=Liao&amp;rft.aufirst=Qianli&amp;rft.au=Poggio%2C+Tomaso&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-xiao2018-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-xiao2018_12-0">^</a></b></span> <span class="reference-text"> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFXiaoChenLiaoPoggio2019" class="citation conference cs1">Xiao, Will; Chen, Honglin; Liao, Qianli; Poggio, Tomaso (2019). <i>Biologically-Plausible Learning Algorithms Can Scale to Large Datasets</i>. <a href="/wiki/International_Conference_on_Learning_Representations" title="International Conference on Learning Representations">International Conference on Learning Representations</a>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1811.03567">1811.03567</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Biologically-Plausible+Learning+Algorithms+Can+Scale+to+Large+Datasets&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1811.03567&amp;rft.aulast=Xiao&amp;rft.aufirst=Will&amp;rft.au=Chen%2C+Honglin&amp;rft.au=Liao%2C+Qianli&amp;rft.au=Poggio%2C+Tomaso&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-Winding2023-13"><span class="mw-cite-backlink"><b><a href="#cite_ref-Winding2023_13-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFWindingPedigoBarnesPatsolic2023" class="citation journal cs1">Winding, Michael; Pedigo, Benjamin; Barnes, Christopher; Patsolic, Heather; Park, Youngser; Kazimiers, Tom; Fushiki, Akira; Andrade, Ingrid; Khandelwal, Avinash; Valdes-Aleman, Javier; Li, Feng; Randel, Nadine; Barsotti, Elizabeth; Correia, Ana; Fetter, Fetter; Hartenstein, Volker; Priebe, Carey; Vogelstein, Joshua; Cardona, Albert; Zlatic, Marta (10 Mar 2023). <a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7614541">"The connectome of an insect brain"</a>. <i>Science</i>. <b>379</b> (6636): eadd9330. <a href="/wiki/BioRxiv_(identifier)" class="mw-redirect" title="BioRxiv (identifier)">bioRxiv</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.1101%2F2022.11.28.516756v1">10.1101/2022.11.28.516756v1</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1126%2Fscience.add9330">10.1126/science.add9330</a>. <a href="/wiki/PMC_(identifier)" class="mw-redirect" title="PMC (identifier)">PMC</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7614541">7614541</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/36893230">36893230</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:254070919">254070919</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Science&amp;rft.atitle=The+connectome+of+an+insect+brain&amp;rft.volume=379&amp;rft.issue=6636&amp;rft.pages=eadd9330&amp;rft.date=2023-03-10&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC7614541%23id-name%3DPMC&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A254070919%23id-name%3DS2CID&amp;rft_id=https%3A%2F%2Fdoi.org%2F10.1101%2F2022.11.28.516756v1%23id-name%3DbioRxiv&amp;rft_id=info%3Apmid%2F36893230&amp;rft_id=info%3Adoi%2F10.1126%2Fscience.add9330&amp;rft.aulast=Winding&amp;rft.aufirst=Michael&amp;rft.au=Pedigo%2C+Benjamin&amp;rft.au=Barnes%2C+Christopher&amp;rft.au=Patsolic%2C+Heather&amp;rft.au=Park%2C+Youngser&amp;rft.au=Kazimiers%2C+Tom&amp;rft.au=Fushiki%2C+Akira&amp;rft.au=Andrade%2C+Ingrid&amp;rft.au=Khandelwal%2C+Avinash&amp;rft.au=Valdes-Aleman%2C+Javier&amp;rft.au=Li%2C+Feng&amp;rft.au=Randel%2C+Nadine&amp;rft.au=Barsotti%2C+Elizabeth&amp;rft.au=Correia%2C+Ana&amp;rft.au=Fetter%2C+Fetter&amp;rft.au=Hartenstein%2C+Volker&amp;rft.au=Priebe%2C+Carey&amp;rft.au=Vogelstein%2C+Joshua&amp;rft.au=Cardona%2C+Albert&amp;rft.au=Zlatic%2C+Marta&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC7614541&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-14"><span class="mw-cite-backlink"><b><a href="#cite_ref-14">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFDe_N1938" class="citation journal cs1">De N, Rafael Lorente (1938-05-01). <a rel="nofollow" class="external text" href="https://www.physiology.org/doi/10.1152/jn.1938.1.3.207">"Analysis of the Activity of the Chains of Internuncial Neurons"</a>. <i>Journal of Neurophysiology</i>. <b>1</b> (3): <span class="nowrap">207–</span>244. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1152%2Fjn.1938.1.3.207">10.1152/jn.1938.1.3.207</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0022-3077">0022-3077</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Journal+of+Neurophysiology&amp;rft.atitle=Analysis+of+the+Activity+of+the+Chains+of+Internuncial+Neurons&amp;rft.volume=1&amp;rft.issue=3&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E207-%3C%2Fspan%3E244&amp;rft.date=1938-05-01&amp;rft_id=info%3Adoi%2F10.1152%2Fjn.1938.1.3.207&amp;rft.issn=0022-3077&amp;rft.aulast=De+N&amp;rft.aufirst=Rafael+Lorente&amp;rft_id=https%3A%2F%2Fwww.physiology.org%2Fdoi%2F10.1152%2Fjn.1938.1.3.207&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-15"><span class="mw-cite-backlink"><b><a href="#cite_ref-15">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFMcCullochPitts1943" class="citation journal cs1">McCulloch, Warren S.; Pitts, Walter (1943-12-01). <a rel="nofollow" class="external text" href="https://link.springer.com/article/10.1007/BF02478259">"A logical calculus of the ideas immanent in nervous activity"</a>. <i>The Bulletin of Mathematical Biophysics</i>. <b>5</b> (4): <span class="nowrap">115–</span>133. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2FBF02478259">10.1007/BF02478259</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/1522-9602">1522-9602</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=The+Bulletin+of+Mathematical+Biophysics&amp;rft.atitle=A+logical+calculus+of+the+ideas+immanent+in+nervous+activity&amp;rft.volume=5&amp;rft.issue=4&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E115-%3C%2Fspan%3E133&amp;rft.date=1943-12-01&amp;rft_id=info%3Adoi%2F10.1007%2FBF02478259&amp;rft.issn=1522-9602&amp;rft.aulast=McCulloch&amp;rft.aufirst=Warren+S.&amp;rft.au=Pitts%2C+Walter&amp;rft_id=https%3A%2F%2Flink.springer.com%2Farticle%2F10.1007%2FBF02478259&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-mlpbook-16"><span class="mw-cite-backlink"><b><a href="#cite_ref-mlpbook_16-0">^</a></b></span> <span class="reference-text"> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFRosenblatt1961" class="citation book cs1">Rosenblatt, Frank (1961). <a rel="nofollow" class="external text" href="https://safari.ethz.ch/digitaltechnik/spring2018/lib/exe/fetch.php?media=neurodynamics1962rosenblatt.pdf#page=327"><i>Principles of neurodynamics. perceptrons and the theory of brain mechanisms</i></a> <span class="cs1-format">(PDF)</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Principles+of+neurodynamics.+perceptrons+and+the+theory+of+brain+mechanisms&amp;rft.date=1961&amp;rft.aulast=Rosenblatt&amp;rft.aufirst=Frank&amp;rft_id=https%3A%2F%2Fsafari.ethz.ch%2Fdigitaltechnik%2Fspring2018%2Flib%2Fexe%2Ffetch.php%3Fmedia%3Dneurodynamics1962rosenblatt.pdf%23page%3D327&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-17"><span class="mw-cite-backlink"><b><a href="#cite_ref-17">^</a></b></span> <span class="reference-text">Rumelhart, David E., Geoffrey E. Hinton, and Ronald J. Williams. "Learning internal representations by error propagation", <i>Parallel Distributed Processing</i>. Vol. 1. 1986.</span> </li> <li id="cite_note-massbook-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-massbook_18-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFVenablesRipley1994" class="citation book cs1">Venables, W. N.; Ripley, Brain D. (1994). <a rel="nofollow" class="external text" href="https://books.google.com/books?id=ayDvAAAAMAAJ"><i>Modern Applied Statistics with S-Plus</i></a>. Springer. pp.&#160;<span class="nowrap">261–</span>262. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/9783540943501" title="Special:BookSources/9783540943501"><bdi>9783540943501</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Modern+Applied+Statistics+with+S-Plus&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E261-%3C%2Fspan%3E262&amp;rft.pub=Springer&amp;rft.date=1994&amp;rft.isbn=9783540943501&amp;rft.aulast=Venables&amp;rft.aufirst=W.+N.&amp;rft.au=Ripley%2C+Brain+D.&amp;rft_id=https%3A%2F%2Fbooks.google.com%2Fbooks%3Fid%3DayDvAAAAMAAJ&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-skip1988-19"><span class="mw-cite-backlink"><b><a href="#cite_ref-skip1988_19-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLangWitbrock1988" class="citation journal cs1">Lang, Kevin; Witbrock, Michael (1988). <a rel="nofollow" class="external text" href="https://gwern.net/doc/ai/nn/fully-connected/1988-lang.pdf">"Learning to tell two spirals apart"</a> <span class="cs1-format">(PDF)</span>. <i>Proceedings of the 1988 Connectionist Models Summer School</i>: <span class="nowrap">52–</span>59.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Proceedings+of+the+1988+Connectionist+Models+Summer+School&amp;rft.atitle=Learning+to+tell+two+spirals+apart&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E52-%3C%2Fspan%3E59&amp;rft.date=1988&amp;rft.aulast=Lang&amp;rft.aufirst=Kevin&amp;rft.au=Witbrock%2C+Michael&amp;rft_id=https%3A%2F%2Fgwern.net%2Fdoc%2Fai%2Fnn%2Ffully-connected%2F1988-lang.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-hochreiter1991-20"><span class="mw-cite-backlink"><b><a href="#cite_ref-hochreiter1991_20-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHochreiter1991" class="citation thesis cs1">Hochreiter, Sepp (1991). <a rel="nofollow" class="external text" href="http://www.bioinf.jku.at/publications/older/3804.pdf"><i>Untersuchungen zu dynamischen neuronalen Netzen</i></a> <span class="cs1-format">(PDF)</span> (diploma thesis). Technical University Munich, Institute of Computer Science, advisor: J. Schmidhuber.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adissertation&amp;rft.title=Untersuchungen+zu+dynamischen+neuronalen+Netzen&amp;rft.degree=diploma&amp;rft.inst=Technical+University+Munich%2C+Institute+of+Computer+Science%2C+advisor%3A+J.+Schmidhuber&amp;rft.date=1991&amp;rft.aulast=Hochreiter&amp;rft.aufirst=Sepp&amp;rft_id=http%3A%2F%2Fwww.bioinf.jku.at%2Fpublications%2Folder%2F3804.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-lstm2000-21"><span class="mw-cite-backlink"><b><a href="#cite_ref-lstm2000_21-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFFelix_A._GersJürgen_SchmidhuberFred_Cummins2000" class="citation journal cs1">Felix A. Gers; Jürgen Schmidhuber; Fred Cummins (2000). "Learning to Forget: Continual Prediction with LSTM". <i><a href="/wiki/Neural_Computation_(journal)" title="Neural Computation (journal)">Neural Computation</a></i>. <b>12</b> (10): <span class="nowrap">2451–</span>2471. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.5709">10.1.1.55.5709</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1162%2F089976600300015015">10.1162/089976600300015015</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/11032042">11032042</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:11598600">11598600</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Neural+Computation&amp;rft.atitle=Learning+to+Forget%3A+Continual+Prediction+with+LSTM&amp;rft.volume=12&amp;rft.issue=10&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E2451-%3C%2Fspan%3E2471&amp;rft.date=2000&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.55.5709%23id-name%3DCiteSeerX&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A11598600%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F11032042&amp;rft_id=info%3Adoi%2F10.1162%2F089976600300015015&amp;rft.au=Felix+A.+Gers&amp;rft.au=J%C3%BCrgen+Schmidhuber&amp;rft.au=Fred+Cummins&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-highway2015may-22"><span class="mw-cite-backlink"><b><a href="#cite_ref-highway2015may_22-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSrivastavaGreffSchmidhuber2015" class="citation arxiv cs1">Srivastava, Rupesh Kumar; Greff, Klaus; Schmidhuber, Jürgen (3 May 2015). "Highway Networks". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1505.00387">1505.00387</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Highway+Networks&amp;rft.date=2015-05-03&amp;rft_id=info%3Aarxiv%2F1505.00387&amp;rft.aulast=Srivastava&amp;rft.aufirst=Rupesh+Kumar&amp;rft.au=Greff%2C+Klaus&amp;rft.au=Schmidhuber%2C+J%C3%BCrgen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-highway2015july-23"><span class="mw-cite-backlink"><b><a href="#cite_ref-highway2015july_23-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSrivastavaGreffSchmidhuber2015" class="citation conference cs1">Srivastava, Rupesh Kumar; Greff, Klaus; Schmidhuber, Jürgen (2015). <a rel="nofollow" class="external text" href="https://proceedings.neurips.cc/paper/2015/file/215a71a12769b056c3c32e7299f1c5ed-Paper.pdf"><i>Training Very Deep Networks</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/Conference_on_Neural_Information_Processing_Systems" title="Conference on Neural Information Processing Systems">Conference on Neural Information Processing Systems</a>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1507.06228">1507.06228</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Training+Very+Deep+Networks&amp;rft.date=2015&amp;rft_id=info%3Aarxiv%2F1507.06228&amp;rft.aulast=Srivastava&amp;rft.aufirst=Rupesh+Kumar&amp;rft.au=Greff%2C+Klaus&amp;rft.au=Schmidhuber%2C+J%C3%BCrgen&amp;rft_id=https%3A%2F%2Fproceedings.neurips.cc%2Fpaper%2F2015%2Ffile%2F215a71a12769b056c3c32e7299f1c5ed-Paper.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-simonyan2015very-24"><span class="mw-cite-backlink">^ <a href="#cite_ref-simonyan2015very_24-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-simonyan2015very_24-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSimonyanZisserman2015" class="citation arxiv cs1">Simonyan, Karen; Zisserman, Andrew (2015-04-10). "Very Deep Convolutional Networks for Large-Scale Image Recognition". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1409.1556">1409.1556</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CV">cs.CV</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Very+Deep+Convolutional+Networks+for+Large-Scale+Image+Recognition&amp;rft.date=2015-04-10&amp;rft_id=info%3Aarxiv%2F1409.1556&amp;rft.aulast=Simonyan&amp;rft.aufirst=Karen&amp;rft.au=Zisserman%2C+Andrew&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-prelu-25"><span class="mw-cite-backlink"><b><a href="#cite_ref-prelu_25-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHeZhangRenSun2015" class="citation conference cs1"><a href="/wiki/Kaiming_He" title="Kaiming He">He, Kaiming</a>; Zhang, Xiangyu; Ren, Shaoqing; Sun, Jian (2015). <a rel="nofollow" class="external text" href="https://openaccess.thecvf.com/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf"><i>Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/International_Conference_on_Computer_Vision" title="International Conference on Computer Vision">International Conference on Computer Vision</a>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1502.01852">1502.01852</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FICCV.2015.123">10.1109/ICCV.2015.123</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Delving+Deep+into+Rectifiers%3A+Surpassing+Human-Level+Performance+on+ImageNet+Classification&amp;rft.date=2015&amp;rft_id=info%3Aarxiv%2F1502.01852&amp;rft_id=info%3Adoi%2F10.1109%2FICCV.2015.123&amp;rft.aulast=He&amp;rft.aufirst=Kaiming&amp;rft.au=Zhang%2C+Xiangyu&amp;rft.au=Ren%2C+Shaoqing&amp;rft.au=Sun%2C+Jian&amp;rft_id=https%3A%2F%2Fopenaccess.thecvf.com%2Fcontent_iccv_2015%2Fpapers%2FHe_Delving_Deep_into_ICCV_2015_paper.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-26"><span class="mw-cite-backlink"><b><a href="#cite_ref-26">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLinn2015" class="citation web cs1">Linn, Allison (2015-12-10). <a rel="nofollow" class="external text" href="https://blogs.microsoft.com/ai/microsoft-researchers-win-imagenet-computer-vision-challenge/">"Microsoft researchers win ImageNet computer vision challenge"</a>. <i>The AI Blog</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-06-29</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=The+AI+Blog&amp;rft.atitle=Microsoft+researchers+win+ImageNet+computer+vision+challenge&amp;rft.date=2015-12-10&amp;rft.aulast=Linn&amp;rft.aufirst=Allison&amp;rft_id=https%3A%2F%2Fblogs.microsoft.com%2Fai%2Fmicrosoft-researchers-win-imagenet-computer-vision-challenge%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-27"><span class="mw-cite-backlink"><b><a href="#cite_ref-27">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHuangLiuvan_der_MaatenWeinberger2017" class="citation conference cs1">Huang, Gao; Liu, Zhuang; van der Maaten, Laurens; Weinberger, Kilian (2017). <a rel="nofollow" class="external text" href="https://openaccess.thecvf.com/content_cvpr_2017/papers/Huang_Densely_Connected_Convolutional_CVPR_2017_paper.pdf"><i>Densely Connected Convolutional Networks</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/Conference_on_Computer_Vision_and_Pattern_Recognition" title="Conference on Computer Vision and Pattern Recognition">Conference on Computer Vision and Pattern Recognition</a>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1608.06993">1608.06993</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FCVPR.2017.243">10.1109/CVPR.2017.243</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Densely+Connected+Convolutional+Networks&amp;rft.date=2017&amp;rft_id=info%3Aarxiv%2F1608.06993&amp;rft_id=info%3Adoi%2F10.1109%2FCVPR.2017.243&amp;rft.aulast=Huang&amp;rft.aufirst=Gao&amp;rft.au=Liu%2C+Zhuang&amp;rft.au=van+der+Maaten%2C+Laurens&amp;rft.au=Weinberger%2C+Kilian&amp;rft_id=https%3A%2F%2Fopenaccess.thecvf.com%2Fcontent_cvpr_2017%2Fpapers%2FHuang_Densely_Connected_Convolutional_CVPR_2017_paper.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-28"><span class="mw-cite-backlink"><b><a href="#cite_ref-28">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHuangSunLiuWeinberger2016" class="citation conference cs1">Huang, Gao; Sun, Yu; Liu, Zhuang; Weinberger, Kilian (2016). <a rel="nofollow" class="external text" href="https://link.springer.com/content/pdf/10.1007/978-3-319-46493-0_39.pdf"><i>Deep Networks with Stochastic Depth</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/European_Conference_on_Computer_Vision" title="European Conference on Computer Vision">European Conference on Computer Vision</a>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1603.09382">1603.09382</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F978-3-319-46493-0_39">10.1007/978-3-319-46493-0_39</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Deep+Networks+with+Stochastic+Depth&amp;rft.date=2016&amp;rft_id=info%3Aarxiv%2F1603.09382&amp;rft_id=info%3Adoi%2F10.1007%2F978-3-319-46493-0_39&amp;rft.aulast=Huang&amp;rft.aufirst=Gao&amp;rft.au=Sun%2C+Yu&amp;rft.au=Liu%2C+Zhuang&amp;rft.au=Weinberger%2C+Kilian&amp;rft_id=https%3A%2F%2Flink.springer.com%2Fcontent%2Fpdf%2F10.1007%2F978-3-319-46493-0_39.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-29"><span class="mw-cite-backlink"><b><a href="#cite_ref-29">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFLeeKimWilletteHwang2022" class="citation conference cs1">Lee, Youngwan; Kim, Jonghee; Willette, Jeffrey; Hwang, Sung Ju (2022). <a rel="nofollow" class="external text" href="https://openaccess.thecvf.com/content/CVPR2022/papers/Lee_MPViT_Multi-Path_Vision_Transformer_for_Dense_Prediction_CVPR_2022_paper.pdf"><i>MPViT: Multi-Path Vision Transformer for Dense Prediction</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/Conference_on_Computer_Vision_and_Pattern_Recognition" title="Conference on Computer Vision and Pattern Recognition">Conference on Computer Vision and Pattern Recognition</a>. pp.&#160;<span class="nowrap">7287–</span>7296. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2112.11010">2112.11010</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FCVPR52688.2022.00714">10.1109/CVPR52688.2022.00714</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=MPViT%3A+Multi-Path+Vision+Transformer+for+Dense+Prediction&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E7287-%3C%2Fspan%3E7296&amp;rft.date=2022&amp;rft_id=info%3Aarxiv%2F2112.11010&amp;rft_id=info%3Adoi%2F10.1109%2FCVPR52688.2022.00714&amp;rft.aulast=Lee&amp;rft.aufirst=Youngwan&amp;rft.au=Kim%2C+Jonghee&amp;rft.au=Willette%2C+Jeffrey&amp;rft.au=Hwang%2C+Sung+Ju&amp;rft_id=https%3A%2F%2Fopenaccess.thecvf.com%2Fcontent%2FCVPR2022%2Fpapers%2FLee_MPViT_Multi-Path_Vision_Transformer_for_Dense_Prediction_CVPR_2022_paper.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-30"><span class="mw-cite-backlink"><b><a href="#cite_ref-30">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFXieGirshickDollarTu2017" class="citation conference cs1">Xie, Saining; Girshick, Ross; Dollar, Piotr; Tu, Zhuowen; <a href="/wiki/Kaiming_He" title="Kaiming He">He, Kaiming</a> (2017). <a rel="nofollow" class="external text" href="https://openaccess.thecvf.com/content_cvpr_2017/papers/Xie_Aggregated_Residual_Transformations_CVPR_2017_paper.pdf"><i>Aggregated Residual Transformations for Deep Neural Networks</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/Conference_on_Computer_Vision_and_Pattern_Recognition" title="Conference on Computer Vision and Pattern Recognition">Conference on Computer Vision and Pattern Recognition</a>. pp.&#160;<span class="nowrap">1492–</span>1500. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1611.05431">1611.05431</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FCVPR.2017.634">10.1109/CVPR.2017.634</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Aggregated+Residual+Transformations+for+Deep+Neural+Networks&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1492-%3C%2Fspan%3E1500&amp;rft.date=2017&amp;rft_id=info%3Aarxiv%2F1611.05431&amp;rft_id=info%3Adoi%2F10.1109%2FCVPR.2017.634&amp;rft.aulast=Xie&amp;rft.aufirst=Saining&amp;rft.au=Girshick%2C+Ross&amp;rft.au=Dollar%2C+Piotr&amp;rft.au=Tu%2C+Zhuowen&amp;rft.au=He%2C+Kaiming&amp;rft_id=https%3A%2F%2Fopenaccess.thecvf.com%2Fcontent_cvpr_2017%2Fpapers%2FXie_Aggregated_Residual_Transformations_CVPR_2017_paper.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> <li id="cite_note-31"><span class="mw-cite-backlink"><b><a href="#cite_ref-31">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFHuShenSun2018" class="citation conference cs1">Hu, Jie; Shen, Li; Sun, Gang (2018). <a rel="nofollow" class="external text" href="https://openaccess.thecvf.com/content_cvpr_2018/papers/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.pdf"><i>Squeeze-and-Excitation Networks</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/Conference_on_Computer_Vision_and_Pattern_Recognition" title="Conference on Computer Vision and Pattern Recognition">Conference on Computer Vision and Pattern Recognition</a>. pp.&#160;<span class="nowrap">7132–</span>7141. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1709.01507">1709.01507</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FCVPR.2018.00745">10.1109/CVPR.2018.00745</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Squeeze-and-Excitation+Networks&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E7132-%3C%2Fspan%3E7141&amp;rft.date=2018&amp;rft_id=info%3Aarxiv%2F1709.01507&amp;rft_id=info%3Adoi%2F10.1109%2FCVPR.2018.00745&amp;rft.aulast=Hu&amp;rft.aufirst=Jie&amp;rft.au=Shen%2C+Li&amp;rft.au=Sun%2C+Gang&amp;rft_id=https%3A%2F%2Fopenaccess.thecvf.com%2Fcontent_cvpr_2018%2Fpapers%2FHu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AResidual+neural+network" class="Z3988"></span></span> </li> </ol></div> <div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="Artificial_intelligence_(AI)776" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Artificial_intelligence_navbox" title="Template:Artificial intelligence navbox"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Artificial_intelligence_navbox" title="Template talk:Artificial intelligence navbox"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Artificial_intelligence_navbox" title="Special:EditPage/Template:Artificial intelligence navbox"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Artificial_intelligence_(AI)776" style="font-size:114%;margin:0 4em"><a href="/wiki/Artificial_intelligence" title="Artificial intelligence">Artificial intelligence (AI)</a></div></th></tr><tr><td class="navbox-abovebelow" colspan="2"><div><a href="/wiki/History_of_artificial_intelligence" title="History of artificial intelligence">History</a> (<a href="/wiki/Timeline_of_artificial_intelligence" title="Timeline of artificial intelligence">timeline</a>)</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Concepts</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Parameter" title="Parameter">Parameter</a> <ul><li><a href="/wiki/Hyperparameter_(machine_learning)" title="Hyperparameter (machine learning)">Hyperparameter</a></li></ul></li> <li><a href="/wiki/Loss_functions_for_classification" title="Loss functions for classification">Loss functions</a></li> <li><a href="/wiki/Regression_analysis" title="Regression analysis">Regression</a> <ul><li><a href="/wiki/Bias%E2%80%93variance_tradeoff" title="Bias–variance tradeoff">Bias–variance tradeoff</a></li> <li><a href="/wiki/Double_descent" title="Double descent">Double descent</a></li> <li><a href="/wiki/Overfitting" title="Overfitting">Overfitting</a></li></ul></li> <li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></li> <li><a href="/wiki/Gradient_descent" title="Gradient descent">Gradient descent</a> <ul><li><a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">SGD</a></li> <li><a href="/wiki/Quasi-Newton_method" title="Quasi-Newton method">Quasi-Newton method</a></li> <li><a href="/wiki/Conjugate_gradient_method" title="Conjugate gradient method">Conjugate gradient method</a></li></ul></li> <li><a href="/wiki/Backpropagation" title="Backpropagation">Backpropagation</a></li> <li><a href="/wiki/Attention_(machine_learning)" title="Attention (machine learning)">Attention</a></li> <li><a href="/wiki/Convolution" title="Convolution">Convolution</a></li> <li><a href="/wiki/Normalization_(machine_learning)" title="Normalization (machine learning)">Normalization</a> <ul><li><a href="/wiki/Batch_normalization" title="Batch normalization">Batchnorm</a></li></ul></li> <li><a href="/wiki/Activation_function" title="Activation function">Activation</a> <ul><li><a href="/wiki/Softmax_function" title="Softmax function">Softmax</a></li> <li><a href="/wiki/Sigmoid_function" title="Sigmoid function">Sigmoid</a></li> <li><a href="/wiki/Rectifier_(neural_networks)" title="Rectifier (neural networks)">Rectifier</a></li></ul></li> <li><a href="/wiki/Gating_mechanism" title="Gating mechanism">Gating</a></li> <li><a href="/wiki/Weight_initialization" title="Weight initialization">Weight initialization</a></li> <li><a href="/wiki/Regularization_(mathematics)" title="Regularization (mathematics)">Regularization</a></li> <li><a href="/wiki/Training,_validation,_and_test_data_sets" title="Training, validation, and test data sets">Datasets</a> <ul><li><a href="/wiki/Data_augmentation" title="Data augmentation">Augmentation</a></li></ul></li> <li><a href="/wiki/Prompt_engineering" title="Prompt engineering">Prompt engineering</a></li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a> <ul><li><a href="/wiki/Q-learning" title="Q-learning">Q-learning</a></li> <li><a href="/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action" title="State–action–reward–state–action">SARSA</a></li> <li><a href="/wiki/Imitation_learning" title="Imitation learning">Imitation</a></li> <li><a href="/wiki/Policy_gradient_method" title="Policy gradient method">Policy gradient</a></li></ul></li> <li><a href="/wiki/Diffusion_process" title="Diffusion process">Diffusion</a></li> <li><a href="/wiki/Latent_diffusion_model" title="Latent diffusion model">Latent diffusion model</a></li> <li><a href="/wiki/Autoregressive_model" title="Autoregressive model">Autoregression</a></li> <li><a href="/wiki/Adversarial_machine_learning" title="Adversarial machine learning">Adversary</a></li> <li><a href="/wiki/Retrieval-augmented_generation" title="Retrieval-augmented generation">RAG</a></li> <li><a href="/wiki/Uncanny_valley" title="Uncanny valley">Uncanny valley</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">RLHF</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Recursive_self-improvement" title="Recursive self-improvement">Recursive self-improvement</a></li> <li><a href="/wiki/Word_embedding" title="Word embedding">Word embedding</a></li> <li><a href="/wiki/Hallucination_(artificial_intelligence)" title="Hallucination (artificial intelligence)">Hallucination</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Applications</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a> <ul><li><a href="/wiki/Prompt_engineering#In-context_learning" title="Prompt engineering">In-context learning</a></li></ul></li> <li><a href="/wiki/Neural_network_(machine_learning)" title="Neural network (machine learning)">Artificial neural network</a> <ul><li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li></ul></li> <li><a href="/wiki/Language_model" title="Language model">Language model</a> <ul><li><a href="/wiki/Large_language_model" title="Large language model">Large language model</a></li> <li><a href="/wiki/Neural_machine_translation" title="Neural machine translation">NMT</a></li></ul></li> <li><a href="/wiki/Artificial_general_intelligence" title="Artificial general intelligence">Artificial general intelligence</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Implementations</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Audio–visual</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlexNet" title="AlexNet">AlexNet</a></li> <li><a href="/wiki/WaveNet" title="WaveNet">WaveNet</a></li> <li><a href="/wiki/Human_image_synthesis" title="Human image synthesis">Human image synthesis</a></li> <li><a href="/wiki/Handwriting_recognition" title="Handwriting recognition">HWR</a></li> <li><a href="/wiki/Optical_character_recognition" title="Optical character recognition">OCR</a></li> <li><a href="/wiki/Deep_learning_speech_synthesis" title="Deep learning speech synthesis">Speech synthesis</a> <ul><li><a href="/wiki/15.ai" title="15.ai">15.ai</a></li> <li><a href="/wiki/ElevenLabs" title="ElevenLabs">ElevenLabs</a></li></ul></li> <li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a> <ul><li><a href="/wiki/Whisper_(speech_recognition_system)" title="Whisper (speech recognition system)">Whisper</a></li></ul></li> <li><a href="/wiki/Facial_recognition_system" title="Facial recognition system">Facial recognition</a></li> <li><a href="/wiki/AlphaFold" title="AlphaFold">AlphaFold</a></li> <li><a href="/wiki/Text-to-image_model" title="Text-to-image model">Text-to-image models</a> <ul><li><a href="/wiki/Aurora_(text-to-image_model)" class="mw-redirect" title="Aurora (text-to-image model)">Aurora</a></li> <li><a href="/wiki/DALL-E" title="DALL-E">DALL-E</a></li> <li><a href="/wiki/Adobe_Firefly" title="Adobe Firefly">Firefly</a></li> <li><a href="/wiki/Flux_(text-to-image_model)" title="Flux (text-to-image model)">Flux</a></li> <li><a href="/wiki/Ideogram_(text-to-image_model)" title="Ideogram (text-to-image model)">Ideogram</a></li> <li><a href="/wiki/Google_Brain#Text-to-image_model" title="Google Brain">Imagen</a></li> <li><a href="/wiki/Midjourney" title="Midjourney">Midjourney</a></li> <li><a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a></li></ul></li> <li><a href="/wiki/Text-to-video_model" title="Text-to-video model">Text-to-video models</a> <ul><li><a href="/wiki/Dream_Machine_(text-to-video_model)" title="Dream Machine (text-to-video model)">Dream Machine</a></li> <li><a href="/wiki/Runway_(company)#Gen-3_Alpha" title="Runway (company)">Gen-3 Alpha</a></li> <li><a href="/wiki/MiniMax_(company)#Hailuo_AI" title="MiniMax (company)">Hailuo AI</a></li> <li><a href="/wiki/Kling_(text-to-video_model)" class="mw-redirect" title="Kling (text-to-video model)">Kling</a></li> <li><a href="/wiki/Sora_(text-to-video_model)" title="Sora (text-to-video model)">Sora</a></li> <li><a href="/wiki/Google_DeepMind#Video_model" title="Google DeepMind">Veo</a></li></ul></li> <li><a href="/wiki/Music_and_artificial_intelligence" title="Music and artificial intelligence">Music generation</a> <ul><li><a href="/wiki/Suno_AI" title="Suno AI">Suno AI</a></li> <li><a href="/wiki/Udio" title="Udio">Udio</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Text</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Word2vec" title="Word2vec">Word2vec</a></li> <li><a href="/wiki/Seq2seq" title="Seq2seq">Seq2seq</a></li> <li><a href="/wiki/GloVe" title="GloVe">GloVe</a></li> <li><a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a></li> <li><a href="/wiki/T5_(language_model)" title="T5 (language model)">T5</a></li> <li><a href="/wiki/Llama_(language_model)" title="Llama (language model)">Llama</a></li> <li><a href="/wiki/Chinchilla_(language_model)" title="Chinchilla (language model)">Chinchilla AI</a></li> <li><a href="/wiki/PaLM" title="PaLM">PaLM</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPT</a> <ul><li><a href="/wiki/GPT-1" title="GPT-1">1</a></li> <li><a href="/wiki/GPT-2" title="GPT-2">2</a></li> <li><a href="/wiki/GPT-3" title="GPT-3">3</a></li> <li><a href="/wiki/GPT-J" title="GPT-J">J</a></li> <li><a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a></li> <li><a href="/wiki/GPT-4" title="GPT-4">4</a></li> <li><a href="/wiki/GPT-4o" title="GPT-4o">4o</a></li> <li><a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a></li> <li><a href="/wiki/OpenAI_o3" title="OpenAI o3">o3</a></li></ul></li> <li><a href="/wiki/Claude_(language_model)" title="Claude (language model)">Claude</a></li> <li><a href="/wiki/Gemini_(language_model)" title="Gemini (language model)">Gemini</a> <ul><li><a href="/wiki/Gemini_(chatbot)" title="Gemini (chatbot)">chatbot</a></li></ul></li> <li><a href="/wiki/Grok_(chatbot)" title="Grok (chatbot)">Grok</a></li> <li><a href="/wiki/LaMDA" title="LaMDA">LaMDA</a></li> <li><a href="/wiki/BLOOM_(language_model)" title="BLOOM (language model)">BLOOM</a></li> <li><a href="/wiki/Project_Debater" title="Project Debater">Project Debater</a></li> <li><a href="/wiki/IBM_Watson" title="IBM Watson">IBM Watson</a></li> <li><a href="/wiki/IBM_Watsonx" title="IBM Watsonx">IBM Watsonx</a></li> <li><a href="/wiki/IBM_Granite" title="IBM Granite">Granite</a></li> <li><a href="/wiki/Huawei_PanGu" title="Huawei PanGu">PanGu-Σ</a></li> <li><a href="/wiki/DeepSeek" title="DeepSeek">DeepSeek</a></li> <li><a href="/wiki/Qwen" title="Qwen">Qwen</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Decisional</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlphaGo" title="AlphaGo">AlphaGo</a></li> <li><a href="/wiki/AlphaZero" title="AlphaZero">AlphaZero</a></li> <li><a href="/wiki/OpenAI_Five" title="OpenAI Five">OpenAI Five</a></li> <li><a href="/wiki/Self-driving_car" title="Self-driving car">Self-driving car</a></li> <li><a href="/wiki/MuZero" title="MuZero">MuZero</a></li> <li><a href="/wiki/Action_selection" title="Action selection">Action selection</a> <ul><li><a href="/wiki/AutoGPT" title="AutoGPT">AutoGPT</a></li></ul></li> <li><a href="/wiki/Robot_control" title="Robot control">Robot control</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">People</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Alan_Turing" title="Alan Turing">Alan Turing</a></li> <li><a href="/wiki/Warren_Sturgis_McCulloch" title="Warren Sturgis McCulloch">Warren Sturgis McCulloch</a></li> <li><a href="/wiki/Walter_Pitts" title="Walter Pitts">Walter Pitts</a></li> <li><a href="/wiki/John_von_Neumann" title="John von Neumann">John von Neumann</a></li> <li><a href="/wiki/Claude_Shannon" title="Claude Shannon">Claude Shannon</a></li> <li><a href="/wiki/Marvin_Minsky" title="Marvin Minsky">Marvin Minsky</a></li> <li><a href="/wiki/John_McCarthy_(computer_scientist)" title="John McCarthy (computer scientist)">John McCarthy</a></li> <li><a href="/wiki/Nathaniel_Rochester_(computer_scientist)" title="Nathaniel Rochester (computer scientist)">Nathaniel Rochester</a></li> <li><a href="/wiki/Allen_Newell" title="Allen Newell">Allen Newell</a></li> <li><a href="/wiki/Cliff_Shaw" title="Cliff Shaw">Cliff Shaw</a></li> <li><a href="/wiki/Herbert_A._Simon" title="Herbert A. Simon">Herbert A. Simon</a></li> <li><a href="/wiki/Oliver_Selfridge" title="Oliver Selfridge">Oliver Selfridge</a></li> <li><a href="/wiki/Frank_Rosenblatt" title="Frank Rosenblatt">Frank Rosenblatt</a></li> <li><a href="/wiki/Bernard_Widrow" title="Bernard Widrow">Bernard Widrow</a></li> <li><a href="/wiki/Joseph_Weizenbaum" title="Joseph Weizenbaum">Joseph Weizenbaum</a></li> <li><a href="/wiki/Seymour_Papert" title="Seymour Papert">Seymour Papert</a></li> <li><a href="/wiki/Seppo_Linnainmaa" title="Seppo Linnainmaa">Seppo Linnainmaa</a></li> <li><a href="/wiki/Paul_Werbos" title="Paul Werbos">Paul Werbos</a></li> <li><a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a></li> <li><a href="/wiki/Yann_LeCun" title="Yann LeCun">Yann LeCun</a></li> <li><a href="/wiki/Geoffrey_Hinton" title="Geoffrey Hinton">Geoffrey Hinton</a></li> <li><a href="/wiki/John_Hopfield" title="John Hopfield">John Hopfield</a></li> <li><a href="/wiki/Yoshua_Bengio" title="Yoshua Bengio">Yoshua Bengio</a></li> <li><a href="/wiki/Lotfi_A._Zadeh" title="Lotfi A. Zadeh">Lotfi A. Zadeh</a></li> <li><a href="/wiki/Stephen_Grossberg" title="Stephen Grossberg">Stephen Grossberg</a></li> <li><a href="/wiki/Alex_Graves_(computer_scientist)" title="Alex Graves (computer scientist)">Alex Graves</a></li> <li><a href="/wiki/Andrew_Ng" title="Andrew Ng">Andrew Ng</a></li> <li><a href="/wiki/Fei-Fei_Li" title="Fei-Fei Li">Fei-Fei Li</a></li> <li><a href="/wiki/Alex_Krizhevsky" title="Alex Krizhevsky">Alex Krizhevsky</a></li> <li><a href="/wiki/Ilya_Sutskever" title="Ilya Sutskever">Ilya Sutskever</a></li> <li><a href="/wiki/Demis_Hassabis" title="Demis Hassabis">Demis Hassabis</a></li> <li><a href="/wiki/David_Silver_(computer_scientist)" title="David Silver (computer scientist)">David Silver</a></li> <li><a href="/wiki/Ian_Goodfellow" title="Ian Goodfellow">Ian Goodfellow</a></li> <li><a href="/wiki/Andrej_Karpathy" title="Andrej Karpathy">Andrej Karpathy</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Architectures</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Neural_Turing_machine" title="Neural Turing machine">Neural Turing machine</a></li> <li><a href="/wiki/Differentiable_neural_computer" title="Differentiable neural computer">Differentiable neural computer</a></li> <li><a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a> <ul><li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision transformer (ViT)</a></li></ul></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network (RNN)</a></li> <li><a href="/wiki/Long_short-term_memory" title="Long short-term memory">Long short-term memory (LSTM)</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">Gated recurrent unit (GRU)</a></li> <li><a href="/wiki/Echo_state_network" title="Echo state network">Echo state network</a></li> <li><a href="/wiki/Multilayer_perceptron" title="Multilayer perceptron">Multilayer perceptron (MLP)</a></li> <li><a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">Convolutional neural network (CNN)</a></li> <li><a class="mw-selflink selflink">Residual neural network (RNN)</a></li> <li><a href="/wiki/Highway_network" title="Highway network">Highway network</a></li> <li><a href="/wiki/Mamba_(deep_learning_architecture)" title="Mamba (deep learning architecture)">Mamba</a></li> <li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Variational_autoencoder" title="Variational autoencoder">Variational autoencoder (VAE)</a></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">Generative adversarial network (GAN)</a></li> <li><a href="/wiki/Graph_neural_network" title="Graph neural network">Graph neural network (GNN)</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><a href="/wiki/File:Symbol_portal_class.svg" class="mw-file-description" title="Portal"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/16px-Symbol_portal_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/23px-Symbol_portal_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/31px-Symbol_portal_class.svg.png 2x" data-file-width="180" data-file-height="185" /></a></span> Portals <ul><li><a href="/wiki/Portal:Technology" title="Portal:Technology">Technology</a></li></ul></li> <li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> <a href="/wiki/Category:Artificial_intelligence" title="Category:Artificial intelligence">Category</a> <ul><li><a href="/wiki/Category:Artificial_neural_networks" title="Category:Artificial neural networks">Artificial neural networks</a></li> <li><a href="/wiki/Category:Machine_learning" title="Category:Machine learning">Machine learning</a></li></ul></li> <li><span class="noviewer" typeof="mw:File"><span title="List-Class article"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/d/db/Symbol_list_class.svg/16px-Symbol_list_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/d/db/Symbol_list_class.svg/23px-Symbol_list_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/d/db/Symbol_list_class.svg/31px-Symbol_list_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> List <ul><li><a href="/wiki/List_of_artificial_intelligence_companies" title="List of artificial intelligence companies">Companies</a></li> <li><a href="/wiki/List_of_artificial_intelligence_projects" title="List of artificial intelligence projects">Projects</a></li></ul></li></ul> </div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐web.codfw.main‐84b999ff94‐rnblr Cached time: 20250204112209 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.617 seconds Real time usage: 0.807 seconds Preprocessor visited node count: 3196/1000000 Post‐expand include size: 106395/2097152 bytes Template argument size: 1023/2097152 bytes Highest expansion depth: 16/100 Expensive parser function count: 3/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 128703/5000000 bytes Lua time usage: 0.359/10.000 seconds Lua memory usage: 5742588/52428800 bytes Number of Wikibase entities loaded: 0/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 563.746 1 -total 29.75% 167.698 14 Template:Cite_conference 17.65% 99.473 1 Template:Artificial_intelligence_navbox 17.38% 97.998 2 Template:Navbox 15.10% 85.126 1 Template:Short_description 9.77% 55.051 2 Template:Pagetype 6.80% 38.340 6 Template:Cite_journal 6.73% 37.965 3 Template:Pg 5.44% 30.688 3 Template:R/superscript 4.82% 27.179 1 Template:Redirect --> <!-- Saved in parser cache with key enwiki:pcache:55867424:|#|:idhash:canonical and timestamp 20250204112209 and revision id 1268423920. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?useformat=desktop&amp;type=1x1&amp;usesul3=0" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Residual_neural_network&amp;oldid=1268423920">https://en.wikipedia.org/w/index.php?title=Residual_neural_network&amp;oldid=1268423920</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Neural_network_architectures" title="Category:Neural network architectures">Neural network architectures</a></li><li><a href="/wiki/Category:Deep_learning" title="Category:Deep learning">Deep learning</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_matches_Wikidata" title="Category:Short description matches Wikidata">Short description matches Wikidata</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 9 January 2025, at 18:27<span class="anonymous-show">&#160;(UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Residual_neural_network&amp;mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" lang="en" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-header-container vector-sticky-header-container"> <div id="vector-sticky-header" class="vector-sticky-header"> <div class="vector-sticky-header-start"> <div class="vector-sticky-header-icon-start vector-button-flush-left vector-button-flush-right" aria-hidden="true"> <button class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-sticky-header-search-toggle" tabindex="-1" data-event-name="ui.vector-sticky-search-form.icon"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </button> </div> <div role="search" class="vector-search-box-vue vector-search-box-show-thumbnail vector-search-box"> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail"> <form action="/w/index.php" id="vector-sticky-search-form" class="cdx-search-input cdx-search-input--has-end-button"> <div class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia"> <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <div class="vector-sticky-header-context-bar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-sticky-header-toc" class="vector-dropdown mw-portlet mw-portlet-sticky-header-toc vector-sticky-header-toc vector-button-flush-left" > <input type="checkbox" id="vector-sticky-header-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-sticky-header-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-sticky-header-toc-label" for="vector-sticky-header-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-sticky-header-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div class="vector-sticky-header-context-bar-primary" aria-hidden="true" ><span class="mw-page-title-main">Residual neural network</span></div> </div> </div> <div class="vector-sticky-header-end" aria-hidden="true"> <div class="vector-sticky-header-icons"> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-talk-sticky-header" tabindex="-1" data-event-name="talk-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbles mw-ui-icon-wikimedia-speechBubbles"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-subject-sticky-header" tabindex="-1" data-event-name="subject-sticky-header"><span class="vector-icon mw-ui-icon-article mw-ui-icon-wikimedia-article"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-history-sticky-header" tabindex="-1" data-event-name="history-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-history mw-ui-icon-wikimedia-wikimedia-history"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only mw-watchlink" id="ca-watchstar-sticky-header" tabindex="-1" data-event-name="watch-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-star mw-ui-icon-wikimedia-wikimedia-star"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-edit-sticky-header" tabindex="-1" data-event-name="wikitext-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-wikiText mw-ui-icon-wikimedia-wikimedia-wikiText"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-ve-edit-sticky-header" tabindex="-1" data-event-name="ve-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-edit mw-ui-icon-wikimedia-wikimedia-edit"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-viewsource-sticky-header" tabindex="-1" data-event-name="ve-edit-protected-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-editLock mw-ui-icon-wikimedia-wikimedia-editLock"></span> <span></span> </a> </div> <div class="vector-sticky-header-buttons"> <button class="cdx-button cdx-button--weight-quiet mw-interlanguage-selector" id="p-lang-btn-sticky-header" tabindex="-1" data-event-name="ui.dropdown-p-lang-btn-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-language mw-ui-icon-wikimedia-wikimedia-language"></span> <span>9 languages</span> </button> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive" id="ca-addsection-sticky-header" tabindex="-1" data-event-name="addsection-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbleAdd-progressive mw-ui-icon-wikimedia-speechBubbleAdd-progressive"></span> <span>Add topic</span> </a> </div> <div class="vector-sticky-header-icon-end"> <div class="vector-user-links"> </div> </div> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-5fc7759dd8-wfpt2","wgBackendResponseTime":149,"wgPageParseReport":{"limitreport":{"cputime":"0.617","walltime":"0.807","ppvisitednodes":{"value":3196,"limit":1000000},"postexpandincludesize":{"value":106395,"limit":2097152},"templateargumentsize":{"value":1023,"limit":2097152},"expansiondepth":{"value":16,"limit":100},"expensivefunctioncount":{"value":3,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":128703,"limit":5000000},"entityaccesscount":{"value":0,"limit":400},"timingprofile":["100.00% 563.746 1 -total"," 29.75% 167.698 14 Template:Cite_conference"," 17.65% 99.473 1 Template:Artificial_intelligence_navbox"," 17.38% 97.998 2 Template:Navbox"," 15.10% 85.126 1 Template:Short_description"," 9.77% 55.051 2 Template:Pagetype"," 6.80% 38.340 6 Template:Cite_journal"," 6.73% 37.965 3 Template:Pg"," 5.44% 30.688 3 Template:R/superscript"," 4.82% 27.179 1 Template:Redirect"]},"scribunto":{"limitreport-timeusage":{"value":"0.359","limit":"10.000"},"limitreport-memusage":{"value":5742588,"limit":52428800}},"cachereport":{"origin":"mw-web.codfw.main-84b999ff94-rnblr","timestamp":"20250204112209","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Residual neural network","url":"https:\/\/en.wikipedia.org\/wiki\/Residual_neural_network","sameAs":"http:\/\/www.wikidata.org\/entity\/Q43744058","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q43744058","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2017-11-23T06:14:45Z","dateModified":"2025-01-09T18:27:56Z","image":"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/b\/ba\/ResBlock.png","headline":"type of artificial neural network"}</script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10