CINXE.COM

Long short-term memory - Wikipedia

<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Long short-term memory - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"ef78f692-1eb2-4536-88d2-0ce83374dd3d","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Long_short-term_memory","wgTitle":"Long short-term memory","wgCurRevisionId":1280106018,"wgRevisionId":1280106018,"wgArticleId":10711453,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["CS1 maint: multiple names: authors list","Articles with short description","Short description is different from Wikidata","Wikipedia articles that are too technical from March 2022","All articles that are too technical","Neural network architectures","Deep learning"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Long_short-term_memory","wgRelevantArticleId":10711453,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":50000,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q6673524","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false}; RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","ext.math.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","mediawiki.page.media","ext.scribunto.logs","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=ext.cite.styles%7Cext.math.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&amp;only=styles&amp;skin=vector-2022"> <script async="" src="/w/load.php?lang=en&amp;modules=startup&amp;only=scripts&amp;raw=1&amp;skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=site.styles&amp;only=styles&amp;skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.20"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/LSTM_Cell.svg/1200px-LSTM_Cell.svg.png"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="820"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/LSTM_Cell.svg/800px-LSTM_Cell.svg.png"> <meta property="og:image:width" content="800"> <meta property="og:image:height" content="547"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/LSTM_Cell.svg/640px-LSTM_Cell.svg.png"> <meta property="og:image:width" content="640"> <meta property="og:image:height" content="438"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Long short-term memory - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Long_short-term_memory"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Long_short-term_memory&amp;action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Long_short-term_memory"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Long_short-term_memory rootpage-Long_short-term_memory skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" title="Main menu" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li><li id="n-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages"><span>Special pages</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page&#039;s font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/?wmf_source=donate&amp;wmf_medium=sidebar&amp;wmf_campaign=en.wikipedia.org&amp;uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&amp;returnto=Long+short-term+memory" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&amp;returnto=Long+short-term+memory" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/?wmf_source=donate&amp;wmf_medium=sidebar&amp;wmf_campaign=en.wikipedia.org&amp;uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&amp;returnto=Long+short-term+memory" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Long+short-term+memory" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-Motivation" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Motivation"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Motivation</span> </div> </a> <ul id="toc-Motivation-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Variants" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Variants"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Variants</span> </div> </a> <button aria-controls="toc-Variants-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Variants subsection</span> </button> <ul id="toc-Variants-sublist" class="vector-toc-list"> <li id="toc-LSTM_with_a_forget_gate" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#LSTM_with_a_forget_gate"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1</span> <span>LSTM with a forget gate</span> </div> </a> <ul id="toc-LSTM_with_a_forget_gate-sublist" class="vector-toc-list"> <li id="toc-Variables" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Variables"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1.1</span> <span>Variables</span> </div> </a> <ul id="toc-Variables-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Activation_functions" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Activation_functions"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1.2</span> <span>Activation functions</span> </div> </a> <ul id="toc-Activation_functions-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Peephole_LSTM" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Peephole_LSTM"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.2</span> <span>Peephole LSTM</span> </div> </a> <ul id="toc-Peephole_LSTM-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Peephole_convolutional_LSTM" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Peephole_convolutional_LSTM"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.3</span> <span>Peephole convolutional LSTM</span> </div> </a> <ul id="toc-Peephole_convolutional_LSTM-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Training" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Training"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Training</span> </div> </a> <button aria-controls="toc-Training-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Training subsection</span> </button> <ul id="toc-Training-sublist" class="vector-toc-list"> <li id="toc-CTC_score_function" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#CTC_score_function"> <div class="vector-toc-text"> <span class="vector-toc-numb">3.1</span> <span>CTC score function</span> </div> </a> <ul id="toc-CTC_score_function-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Alternatives" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Alternatives"> <div class="vector-toc-text"> <span class="vector-toc-numb">3.2</span> <span>Alternatives</span> </div> </a> <ul id="toc-Alternatives-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Applications" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Applications"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Applications</span> </div> </a> <ul id="toc-Applications-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-History" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#History"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>History</span> </div> </a> <button aria-controls="toc-History-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle History subsection</span> </button> <ul id="toc-History-sublist" class="vector-toc-list"> <li id="toc-Development" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Development"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.1</span> <span>Development</span> </div> </a> <ul id="toc-Development-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Development_of_variants" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Development_of_variants"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.2</span> <span>Development of variants</span> </div> </a> <ul id="toc-Development_of_variants-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Applications_2" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Applications_2"> <div class="vector-toc-text"> <span class="vector-toc-numb">5.3</span> <span>Applications</span> </div> </a> <ul id="toc-Applications_2-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Further_reading" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Further_reading"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>Further reading</span> </div> </a> <ul id="toc-Further_reading-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-External_links" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#External_links"> <div class="vector-toc-text"> <span class="vector-toc-numb">9</span> <span>External links</span> </div> </a> <ul id="toc-External_links-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" title="Table of Contents" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Long short-term memory</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 22 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-22" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">22 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-ar mw-list-item"><a href="https://ar.wikipedia.org/wiki/%D8%B0%D8%A7%D9%83%D8%B1%D8%A9_%D9%82%D8%B5%D9%8A%D8%B1%D8%A9_%D8%A7%D9%84%D9%85%D8%AF%D9%89_%D9%85%D8%B7%D9%88%D9%84%D8%A9" title="ذاكرة قصيرة المدى مطولة – Arabic" lang="ar" hreflang="ar" data-title="ذاكرة قصيرة المدى مطولة" data-language-autonym="العربية" data-language-local-name="Arabic" class="interlanguage-link-target"><span>العربية</span></a></li><li class="interlanguage-link interwiki-zh-min-nan mw-list-item"><a href="https://zh-min-nan.wikipedia.org/wiki/Tn%CC%82g_t%C3%A9-k%C3%AE_k%C3%AC-ek" title="Tn̂g té-kî kì-ek – Minnan" lang="nan" hreflang="nan" data-title="Tn̂g té-kî kì-ek" data-language-autonym="閩南語 / Bân-lâm-gú" data-language-local-name="Minnan" class="interlanguage-link-target"><span>閩南語 / Bân-lâm-gú</span></a></li><li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/Long_short-term_memory" title="Long short-term memory – Catalan" lang="ca" hreflang="ca" data-title="Long short-term memory" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-cs mw-list-item"><a href="https://cs.wikipedia.org/wiki/LSTM" title="LSTM – Czech" lang="cs" hreflang="cs" data-title="LSTM" data-language-autonym="Čeština" data-language-local-name="Czech" class="interlanguage-link-target"><span>Čeština</span></a></li><li class="interlanguage-link interwiki-de mw-list-item"><a href="https://de.wikipedia.org/wiki/Long_short-term_memory" title="Long short-term memory – German" lang="de" hreflang="de" data-title="Long short-term memory" data-language-autonym="Deutsch" data-language-local-name="German" class="interlanguage-link-target"><span>Deutsch</span></a></li><li class="interlanguage-link interwiki-el mw-list-item"><a href="https://el.wikipedia.org/wiki/%CE%94%CE%AF%CE%BA%CF%84%CF%85%CE%B1_%CE%9C%CE%B1%CE%BA%CF%81%CE%AC%CF%82_%CE%92%CF%81%CE%B1%CF%87%CF%8D%CF%87%CF%81%CE%BF%CE%BD%CE%B7%CF%82_%CE%9C%CE%BD%CE%AE%CE%BC%CE%B7%CF%82" title="Δίκτυα Μακράς Βραχύχρονης Μνήμης – Greek" lang="el" hreflang="el" data-title="Δίκτυα Μακράς Βραχύχρονης Μνήμης" data-language-autonym="Ελληνικά" data-language-local-name="Greek" class="interlanguage-link-target"><span>Ελληνικά</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/Memoria_larga_a_corto_plazo" title="Memoria larga a corto plazo – Spanish" lang="es" hreflang="es" data-title="Memoria larga a corto plazo" data-language-autonym="Español" data-language-local-name="Spanish" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%D8%AD%D8%A7%D9%81%D8%B8%D9%87_%D8%B7%D9%88%D9%84%D8%A7%D9%86%DB%8C_%DA%A9%D9%88%D8%AA%D8%A7%D9%87_%D9%85%D8%AF%D8%AA" title="حافظه طولانی کوتاه مدت – Persian" lang="fa" hreflang="fa" data-title="حافظه طولانی کوتاه مدت" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-gl mw-list-item"><a href="https://gl.wikipedia.org/wiki/LSTM" title="LSTM – Galician" lang="gl" hreflang="gl" data-title="LSTM" data-language-autonym="Galego" data-language-local-name="Galician" class="interlanguage-link-target"><span>Galego</span></a></li><li class="interlanguage-link interwiki-ko mw-list-item"><a href="https://ko.wikipedia.org/wiki/%EC%9E%A5%EB%8B%A8%EA%B8%B0_%EB%A9%94%EB%AA%A8%EB%A6%AC" title="장단기 메모리 – Korean" lang="ko" hreflang="ko" data-title="장단기 메모리" data-language-autonym="한국어" data-language-local-name="Korean" class="interlanguage-link-target"><span>한국어</span></a></li><li class="interlanguage-link interwiki-lv mw-list-item"><a href="https://lv.wikipedia.org/wiki/LSTM" title="LSTM – Latvian" lang="lv" hreflang="lv" data-title="LSTM" data-language-autonym="Latviešu" data-language-local-name="Latvian" class="interlanguage-link-target"><span>Latviešu</span></a></li><li class="interlanguage-link interwiki-ja mw-list-item"><a href="https://ja.wikipedia.org/wiki/%E9%95%B7%E3%83%BB%E7%9F%AD%E6%9C%9F%E8%A8%98%E6%86%B6" title="長・短期記憶 – Japanese" lang="ja" hreflang="ja" data-title="長・短期記憶" data-language-autonym="日本語" data-language-local-name="Japanese" class="interlanguage-link-target"><span>日本語</span></a></li><li class="interlanguage-link interwiki-pt mw-list-item"><a href="https://pt.wikipedia.org/wiki/Long_short-term_memory" title="Long short-term memory – Portuguese" lang="pt" hreflang="pt" data-title="Long short-term memory" data-language-autonym="Português" data-language-local-name="Portuguese" class="interlanguage-link-target"><span>Português</span></a></li><li class="interlanguage-link interwiki-ru mw-list-item"><a href="https://ru.wikipedia.org/wiki/%D0%94%D0%BE%D0%BB%D0%B3%D0%B0%D1%8F_%D0%BA%D1%80%D0%B0%D1%82%D0%BA%D0%BE%D1%81%D1%80%D0%BE%D1%87%D0%BD%D0%B0%D1%8F_%D0%BF%D0%B0%D0%BC%D1%8F%D1%82%D1%8C" title="Долгая краткосрочная память – Russian" lang="ru" hreflang="ru" data-title="Долгая краткосрочная память" data-language-autonym="Русский" data-language-local-name="Russian" class="interlanguage-link-target"><span>Русский</span></a></li><li class="interlanguage-link interwiki-sr mw-list-item"><a href="https://sr.wikipedia.org/wiki/%D0%94%D1%83%D0%B3%D0%B0_%D0%BA%D1%80%D0%B0%D1%82%D0%BA%D0%BE%D1%80%D0%BE%D1%87%D0%BD%D0%B0_%D0%BC%D0%B5%D0%BC%D0%BE%D1%80%D0%B8%D1%98%D0%B0" title="Дуга краткорочна меморија – Serbian" lang="sr" hreflang="sr" data-title="Дуга краткорочна меморија" data-language-autonym="Српски / srpski" data-language-local-name="Serbian" class="interlanguage-link-target"><span>Српски / srpski</span></a></li><li class="interlanguage-link interwiki-th mw-list-item"><a href="https://th.wikipedia.org/wiki/%E0%B8%AB%E0%B8%99%E0%B9%88%E0%B8%A7%E0%B8%A2%E0%B8%84%E0%B8%A7%E0%B8%B2%E0%B8%A1%E0%B8%88%E0%B8%B3%E0%B8%A3%E0%B8%B0%E0%B8%A2%E0%B8%B0%E0%B8%AA%E0%B8%B1%E0%B9%89%E0%B8%99%E0%B9%81%E0%B8%9A%E0%B8%9A%E0%B8%A2%E0%B8%B2%E0%B8%A7" title="หน่วยความจำระยะสั้นแบบยาว – Thai" lang="th" hreflang="th" data-title="หน่วยความจำระยะสั้นแบบยาว" data-language-autonym="ไทย" data-language-local-name="Thai" class="interlanguage-link-target"><span>ไทย</span></a></li><li class="interlanguage-link interwiki-tr mw-list-item"><a href="https://tr.wikipedia.org/wiki/Uzun_k%C4%B1sa_s%C3%BCreli_bellek" title="Uzun kısa süreli bellek – Turkish" lang="tr" hreflang="tr" data-title="Uzun kısa süreli bellek" data-language-autonym="Türkçe" data-language-local-name="Turkish" class="interlanguage-link-target"><span>Türkçe</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/%D0%94%D0%BE%D0%B2%D0%B3%D0%B0_%D0%BA%D0%BE%D1%80%D0%BE%D1%82%D0%BA%D0%BE%D1%87%D0%B0%D1%81%D0%BD%D0%B0_%D0%BF%D0%B0%D0%BC%27%D1%8F%D1%82%D1%8C" title="Довга короткочасна пам&#039;ять – Ukrainian" lang="uk" hreflang="uk" data-title="Довга короткочасна пам&#039;ять" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li><li class="interlanguage-link interwiki-vi mw-list-item"><a href="https://vi.wikipedia.org/wiki/B%E1%BB%99_nh%E1%BB%9B_d%C3%A0i-ng%E1%BA%AFn_h%E1%BA%A1n" title="Bộ nhớ dài-ngắn hạn – Vietnamese" lang="vi" hreflang="vi" data-title="Bộ nhớ dài-ngắn hạn" data-language-autonym="Tiếng Việt" data-language-local-name="Vietnamese" class="interlanguage-link-target"><span>Tiếng Việt</span></a></li><li class="interlanguage-link interwiki-wuu mw-list-item"><a href="https://wuu.wikipedia.org/wiki/%E9%95%BF%E7%9F%AD%E6%9C%9F%E8%AE%B0%E5%BF%86" title="长短期记忆 – Wu" lang="wuu" hreflang="wuu" data-title="长短期记忆" data-language-autonym="吴语" data-language-local-name="Wu" class="interlanguage-link-target"><span>吴语</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/%E9%95%B7%E7%9F%AD%E6%9C%9F%E8%A8%98%E6%86%B6" title="長短期記憶 – Cantonese" lang="yue" hreflang="yue" data-title="長短期記憶" data-language-autonym="粵語" data-language-local-name="Cantonese" class="interlanguage-link-target"><span>粵語</span></a></li><li class="interlanguage-link interwiki-zh mw-list-item"><a href="https://zh.wikipedia.org/wiki/%E9%95%B7%E7%9F%AD%E6%9C%9F%E8%A8%98%E6%86%B6" title="長短期記憶 – Chinese" lang="zh" hreflang="zh" data-title="長短期記憶" data-language-autonym="中文" data-language-local-name="Chinese" class="interlanguage-link-target"><span>中文</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q6673524#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Long_short-term_memory" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Long_short-term_memory" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Long_short-term_memory"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Long_short-term_memory&amp;action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Long_short-term_memory"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Long_short-term_memory&amp;action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Long_short-term_memory" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Long_short-term_memory" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Long_short-term_memory&amp;oldid=1280106018" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Long_short-term_memory&amp;action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&amp;page=Long_short-term_memory&amp;id=1280106018&amp;wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FLong_short-term_memory"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FLong_short-term_memory"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&amp;page=Long_short-term_memory&amp;action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Long_short-term_memory&amp;printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q6673524" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Type of recurrent neural network architecture</div> <style data-mw-deduplicate="TemplateStyles:r1236090951">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}@media print{body.ns-0 .mw-parser-output .hatnote{display:none!important}}</style><div role="note" class="hatnote navigation-not-searchable">"LSTM" redirects here. For other uses, see <a href="/wiki/LSTM_(disambiguation)" class="mw-disambig" title="LSTM (disambiguation)">LSTM (disambiguation)</a>.</div> <style data-mw-deduplicate="TemplateStyles:r1251242444">.mw-parser-output .ambox{border:1px solid #a2a9b1;border-left:10px solid #36c;background-color:#fbfbfb;box-sizing:border-box}.mw-parser-output .ambox+link+.ambox,.mw-parser-output .ambox+link+style+.ambox,.mw-parser-output .ambox+link+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+style+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+link+.ambox{margin-top:-1px}html body.mediawiki .mw-parser-output .ambox.mbox-small-left{margin:4px 1em 4px 0;overflow:hidden;width:238px;border-collapse:collapse;font-size:88%;line-height:1.25em}.mw-parser-output .ambox-speedy{border-left:10px solid #b32424;background-color:#fee7e6}.mw-parser-output .ambox-delete{border-left:10px solid #b32424}.mw-parser-output .ambox-content{border-left:10px solid #f28500}.mw-parser-output .ambox-style{border-left:10px solid #fc3}.mw-parser-output .ambox-move{border-left:10px solid #9932cc}.mw-parser-output .ambox-protection{border-left:10px solid #a2a9b1}.mw-parser-output .ambox .mbox-text{border:none;padding:0.25em 0.5em;width:100%}.mw-parser-output .ambox .mbox-image{border:none;padding:2px 0 2px 0.5em;text-align:center}.mw-parser-output .ambox .mbox-imageright{border:none;padding:2px 0.5em 2px 0;text-align:center}.mw-parser-output .ambox .mbox-empty-cell{border:none;padding:0;width:1px}.mw-parser-output .ambox .mbox-image-div{width:52px}@media(min-width:720px){.mw-parser-output .ambox{margin:0 10%}}@media print{body.ns-0 .mw-parser-output .ambox{display:none!important}}</style><table class="box-Technical plainlinks metadata ambox ambox-style ambox-technical" role="presentation"><tbody><tr><td class="mbox-image"><div class="mbox-image-div"><span typeof="mw:File"><span><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/40px-Edit-clear.svg.png" decoding="async" width="40" height="40" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/60px-Edit-clear.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/80px-Edit-clear.svg.png 2x" data-file-width="48" data-file-height="48" /></span></span></div></td><td class="mbox-text"><div class="mbox-text-span">This article <b>may be too technical for most readers to understand</b>.<span class="hide-when-compact"> Please <a class="external text" href="https://en.wikipedia.org/w/index.php?title=Long_short-term_memory&amp;action=edit">help improve it</a> to <a href="/wiki/Wikipedia:Make_technical_articles_understandable" title="Wikipedia:Make technical articles understandable">make it understandable to non-experts</a>, without removing the technical details.</span> <span class="date-container"><i>(<span class="date">March 2022</span>)</i></span><span class="hide-when-compact"><i> (<small><a href="/wiki/Help:Maintenance_template_removal" title="Help:Maintenance template removal">Learn how and when to remove this message</a></small>)</i></span></div></td></tr></tbody></table> <style data-mw-deduplicate="TemplateStyles:r1244144826">.mw-parser-output .machine-learning-list-title{background-color:#ddddff}html.skin-theme-clientpref-night .mw-parser-output .machine-learning-list-title{background-color:#222}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .machine-learning-list-title{background-color:#222}}</style> <style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1246091330">.mw-parser-output .sidebar{width:22em;float:right;clear:right;margin:0.5em 0 1em 1em;background:var(--background-color-neutral-subtle,#f8f9fa);border:1px solid var(--border-color-base,#a2a9b1);padding:0.2em;text-align:center;line-height:1.4em;font-size:88%;border-collapse:collapse;display:table}body.skin-minerva .mw-parser-output .sidebar{display:table!important;float:right!important;margin:0.5em 0 1em 1em!important}.mw-parser-output .sidebar-subgroup{width:100%;margin:0;border-spacing:0}.mw-parser-output .sidebar-left{float:left;clear:left;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-none{float:none;clear:both;margin:0.5em 1em 1em 0}.mw-parser-output .sidebar-outer-title{padding:0 0.4em 0.2em;font-size:125%;line-height:1.2em;font-weight:bold}.mw-parser-output .sidebar-top-image{padding:0.4em}.mw-parser-output .sidebar-top-caption,.mw-parser-output .sidebar-pretitle-with-top-image,.mw-parser-output .sidebar-caption{padding:0.2em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-pretitle{padding:0.4em 0.4em 0;line-height:1.2em}.mw-parser-output .sidebar-title,.mw-parser-output .sidebar-title-with-pretitle{padding:0.2em 0.8em;font-size:145%;line-height:1.2em}.mw-parser-output .sidebar-title-with-pretitle{padding:0.1em 0.4em}.mw-parser-output .sidebar-image{padding:0.2em 0.4em 0.4em}.mw-parser-output .sidebar-heading{padding:0.1em 0.4em}.mw-parser-output .sidebar-content{padding:0 0.5em 0.4em}.mw-parser-output .sidebar-content-with-subgroup{padding:0.1em 0.4em 0.2em}.mw-parser-output .sidebar-above,.mw-parser-output .sidebar-below{padding:0.3em 0.8em;font-weight:bold}.mw-parser-output .sidebar-collapse .sidebar-above,.mw-parser-output .sidebar-collapse .sidebar-below{border-top:1px solid #aaa;border-bottom:1px solid #aaa}.mw-parser-output .sidebar-navbar{text-align:right;font-size:115%;padding:0 0.4em 0.4em}.mw-parser-output .sidebar-list-title{padding:0 0.4em;text-align:left;font-weight:bold;line-height:1.6em;font-size:105%}.mw-parser-output .sidebar-list-title-c{padding:0 0.4em;text-align:center;margin:0 3.3em}@media(max-width:640px){body.mediawiki .mw-parser-output .sidebar{width:100%!important;clear:both;float:none!important;margin-left:0!important;margin-right:0!important}}body.skin--responsive .mw-parser-output .sidebar a>img{max-width:none!important}@media screen{html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-night .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-list-title,html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle{background:transparent!important}html.skin-theme-clientpref-os .mw-parser-output .sidebar:not(.notheme) .sidebar-title-with-pretitle a{color:var(--color-progressive)!important}}@media print{body.ns-0 .mw-parser-output .sidebar{display:none!important}}</style><style data-mw-deduplicate="TemplateStyles:r886047488">.mw-parser-output .nobold{font-weight:normal}</style><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r886047488" /><table class="sidebar sidebar-collapse nomobile nowraplinks"><tbody><tr><td class="sidebar-pretitle">Part of a series on</td></tr><tr><th class="sidebar-title-with-pretitle"><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a><br />and <a href="/wiki/Data_mining" title="Data mining">data mining</a></th></tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Paradigms</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a></li> <li><a href="/wiki/Unsupervised_learning" title="Unsupervised learning">Unsupervised learning</a></li> <li><a href="/wiki/Semi-supervised_learning" class="mw-redirect" title="Semi-supervised learning">Semi-supervised learning</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></li> <li><a href="/wiki/Meta-learning_(computer_science)" title="Meta-learning (computer science)">Meta-learning</a></li> <li><a href="/wiki/Online_machine_learning" title="Online machine learning">Online learning</a></li> <li><a href="/wiki/Batch_learning" class="mw-redirect" title="Batch learning">Batch learning</a></li> <li><a href="/wiki/Curriculum_learning" title="Curriculum learning">Curriculum learning</a></li> <li><a href="/wiki/Rule-based_machine_learning" title="Rule-based machine learning">Rule-based learning</a></li> <li><a href="/wiki/Neuro-symbolic_AI" title="Neuro-symbolic AI">Neuro-symbolic AI</a></li> <li><a href="/wiki/Neuromorphic_engineering" class="mw-redirect" title="Neuromorphic engineering">Neuromorphic engineering</a></li> <li><a href="/wiki/Quantum_machine_learning" title="Quantum machine learning">Quantum machine learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Problems</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Statistical_classification" title="Statistical classification">Classification</a></li> <li><a href="/wiki/Generative_model" title="Generative model">Generative modeling</a></li> <li><a href="/wiki/Regression_analysis" title="Regression analysis">Regression</a></li> <li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></li> <li><a href="/wiki/Dimensionality_reduction" title="Dimensionality reduction">Dimensionality reduction</a></li> <li><a href="/wiki/Density_estimation" title="Density estimation">Density estimation</a></li> <li><a href="/wiki/Anomaly_detection" title="Anomaly detection">Anomaly detection</a></li> <li><a href="/wiki/Data_cleaning" class="mw-redirect" title="Data cleaning">Data cleaning</a></li> <li><a href="/wiki/Automated_machine_learning" title="Automated machine learning">AutoML</a></li> <li><a href="/wiki/Association_rule_learning" title="Association rule learning">Association rules</a></li> <li><a href="/wiki/Semantic_analysis_(machine_learning)" title="Semantic analysis (machine learning)">Semantic analysis</a></li> <li><a href="/wiki/Structured_prediction" title="Structured prediction">Structured prediction</a></li> <li><a href="/wiki/Feature_engineering" title="Feature engineering">Feature engineering</a></li> <li><a href="/wiki/Feature_learning" title="Feature learning">Feature learning</a></li> <li><a href="/wiki/Learning_to_rank" title="Learning to rank">Learning to rank</a></li> <li><a href="/wiki/Grammar_induction" title="Grammar induction">Grammar induction</a></li> <li><a href="/wiki/Ontology_learning" title="Ontology learning">Ontology learning</a></li> <li><a href="/wiki/Multimodal_learning" title="Multimodal learning">Multimodal learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0;"><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a><br /><span class="nobold"><span style="font-size:85%;">(<b><a href="/wiki/Statistical_classification" title="Statistical classification">classification</a></b>&#160;&#8226;&#32;<b><a href="/wiki/Regression_analysis" title="Regression analysis">regression</a></b>)</span></span> </div></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Apprenticeship_learning" title="Apprenticeship learning">Apprenticeship learning</a></li> <li><a href="/wiki/Decision_tree_learning" title="Decision tree learning">Decision trees</a></li> <li><a href="/wiki/Ensemble_learning" title="Ensemble learning">Ensembles</a> <ul><li><a href="/wiki/Bootstrap_aggregating" title="Bootstrap aggregating">Bagging</a></li> <li><a href="/wiki/Boosting_(machine_learning)" title="Boosting (machine learning)">Boosting</a></li> <li><a href="/wiki/Random_forest" title="Random forest">Random forest</a></li></ul></li> <li><a href="/wiki/K-nearest_neighbors_algorithm" title="K-nearest neighbors algorithm"><i>k</i>-NN</a></li> <li><a href="/wiki/Linear_regression" title="Linear regression">Linear regression</a></li> <li><a href="/wiki/Naive_Bayes_classifier" title="Naive Bayes classifier">Naive Bayes</a></li> <li><a href="/wiki/Artificial_neural_network" class="mw-redirect" title="Artificial neural network">Artificial neural networks</a></li> <li><a href="/wiki/Logistic_regression" title="Logistic regression">Logistic regression</a></li> <li><a href="/wiki/Perceptron" title="Perceptron">Perceptron</a></li> <li><a href="/wiki/Relevance_vector_machine" title="Relevance vector machine">Relevance vector machine (RVM)</a></li> <li><a href="/wiki/Support_vector_machine" title="Support vector machine">Support vector machine (SVM)</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/BIRCH" title="BIRCH">BIRCH</a></li> <li><a href="/wiki/CURE_algorithm" title="CURE algorithm">CURE</a></li> <li><a href="/wiki/Hierarchical_clustering" title="Hierarchical clustering">Hierarchical</a></li> <li><a href="/wiki/K-means_clustering" title="K-means clustering"><i>k</i>-means</a></li> <li><a href="/wiki/Fuzzy_clustering" title="Fuzzy clustering">Fuzzy</a></li> <li><a href="/wiki/Expectation%E2%80%93maximization_algorithm" title="Expectation–maximization algorithm">Expectation–maximization (EM)</a></li> <li><br /><a href="/wiki/DBSCAN" title="DBSCAN">DBSCAN</a></li> <li><a href="/wiki/OPTICS_algorithm" title="OPTICS algorithm">OPTICS</a></li> <li><a href="/wiki/Mean_shift" title="Mean shift">Mean shift</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Dimensionality_reduction" title="Dimensionality reduction">Dimensionality reduction</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Factor_analysis" title="Factor analysis">Factor analysis</a></li> <li><a href="/wiki/Canonical_correlation" title="Canonical correlation">CCA</a></li> <li><a href="/wiki/Independent_component_analysis" title="Independent component analysis">ICA</a></li> <li><a href="/wiki/Linear_discriminant_analysis" title="Linear discriminant analysis">LDA</a></li> <li><a href="/wiki/Non-negative_matrix_factorization" title="Non-negative matrix factorization">NMF</a></li> <li><a href="/wiki/Principal_component_analysis" title="Principal component analysis">PCA</a></li> <li><a href="/wiki/Proper_generalized_decomposition" title="Proper generalized decomposition">PGD</a></li> <li><a href="/wiki/T-distributed_stochastic_neighbor_embedding" title="T-distributed stochastic neighbor embedding">t-SNE</a></li> <li><a href="/wiki/Sparse_dictionary_learning" title="Sparse dictionary learning">SDL</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Structured_prediction" title="Structured prediction">Structured prediction</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Graphical_model" title="Graphical model">Graphical models</a> <ul><li><a href="/wiki/Bayesian_network" title="Bayesian network">Bayes net</a></li> <li><a href="/wiki/Conditional_random_field" title="Conditional random field">Conditional random field</a></li> <li><a href="/wiki/Hidden_Markov_model" title="Hidden Markov model">Hidden Markov</a></li></ul></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Anomaly_detection" title="Anomaly detection">Anomaly detection</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Random_sample_consensus" title="Random sample consensus">RANSAC</a></li> <li><a href="/wiki/K-nearest_neighbors_algorithm" title="K-nearest neighbors algorithm"><i>k</i>-NN</a></li> <li><a href="/wiki/Local_outlier_factor" title="Local outlier factor">Local outlier factor</a></li> <li><a href="/wiki/Isolation_forest" title="Isolation forest">Isolation forest</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Artificial_neural_network" class="mw-redirect" title="Artificial neural network">Artificial neural network</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li> <li><a href="/wiki/Feedforward_neural_network" title="Feedforward neural network">Feedforward neural network</a></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network</a> <ul><li><a class="mw-selflink selflink">LSTM</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">GRU</a></li> <li><a href="/wiki/Echo_state_network" title="Echo state network">ESN</a></li> <li><a href="/wiki/Reservoir_computing" title="Reservoir computing">reservoir computing</a></li></ul></li> <li><a href="/wiki/Boltzmann_machine" title="Boltzmann machine">Boltzmann machine</a> <ul><li><a href="/wiki/Restricted_Boltzmann_machine" title="Restricted Boltzmann machine">Restricted</a></li></ul></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">GAN</a></li> <li><a href="/wiki/Diffusion_model" title="Diffusion model">Diffusion model</a></li> <li><a href="/wiki/Self-organizing_map" title="Self-organizing map">SOM</a></li> <li><a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">Convolutional neural network</a> <ul><li><a href="/wiki/U-Net" title="U-Net">U-Net</a></li> <li><a href="/wiki/LeNet" title="LeNet">LeNet</a></li> <li><a href="/wiki/AlexNet" title="AlexNet">AlexNet</a></li> <li><a href="/wiki/DeepDream" title="DeepDream">DeepDream</a></li></ul></li> <li><a href="/wiki/Neural_radiance_field" title="Neural radiance field">Neural radiance field</a></li> <li><a href="/wiki/Transformer_(machine_learning_model)" class="mw-redirect" title="Transformer (machine learning model)">Transformer</a> <ul><li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision</a></li></ul></li> <li><a href="/wiki/Mamba_(deep_learning_architecture)" title="Mamba (deep learning architecture)">Mamba</a></li> <li><a href="/wiki/Spiking_neural_network" title="Spiking neural network">Spiking neural network</a></li> <li><a href="/wiki/Memtransistor" title="Memtransistor">Memtransistor</a></li> <li><a href="/wiki/Electrochemical_RAM" title="Electrochemical RAM">Electrochemical RAM</a> (ECRAM)</li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)"><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a></div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Q-learning" title="Q-learning">Q-learning</a></li> <li><a href="/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action" title="State–action–reward–state–action">SARSA</a></li> <li><a href="/wiki/Temporal_difference_learning" title="Temporal difference learning">Temporal difference (TD)</a></li> <li><a href="/wiki/Multi-agent_reinforcement_learning" title="Multi-agent reinforcement learning">Multi-agent</a> <ul><li><a href="/wiki/Self-play_(reinforcement_learning_technique)" class="mw-redirect" title="Self-play (reinforcement learning technique)">Self-play</a></li></ul></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Learning with humans</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Active_learning_(machine_learning)" title="Active learning (machine learning)">Active learning</a></li> <li><a href="/wiki/Crowdsourcing" title="Crowdsourcing">Crowdsourcing</a></li> <li><a href="/wiki/Human-in-the-loop" title="Human-in-the-loop">Human-in-the-loop</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">RLHF</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Model diagnostics</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Coefficient_of_determination" title="Coefficient of determination">Coefficient of determination</a></li> <li><a href="/wiki/Confusion_matrix" title="Confusion matrix">Confusion matrix</a></li> <li><a href="/wiki/Learning_curve_(machine_learning)" title="Learning curve (machine learning)">Learning curve</a></li> <li><a href="/wiki/Receiver_operating_characteristic" title="Receiver operating characteristic">ROC curve</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Mathematical foundations</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Kernel_machines" class="mw-redirect" title="Kernel machines">Kernel machines</a></li> <li><a href="/wiki/Bias%E2%80%93variance_tradeoff" title="Bias–variance tradeoff">Bias–variance tradeoff</a></li> <li><a href="/wiki/Computational_learning_theory" title="Computational learning theory">Computational learning theory</a></li> <li><a href="/wiki/Empirical_risk_minimization" title="Empirical risk minimization">Empirical risk minimization</a></li> <li><a href="/wiki/Occam_learning" title="Occam learning">Occam learning</a></li> <li><a href="/wiki/Probably_approximately_correct_learning" title="Probably approximately correct learning">PAC learning</a></li> <li><a href="/wiki/Statistical_learning_theory" title="Statistical learning theory">Statistical learning</a></li> <li><a href="/wiki/Vapnik%E2%80%93Chervonenkis_theory" title="Vapnik–Chervonenkis theory">VC theory</a></li> <li><a href="/wiki/Topological_deep_learning" title="Topological deep learning">Topological deep learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Journals and conferences</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/ECML_PKDD" title="ECML PKDD">ECML PKDD</a></li> <li><a href="/wiki/Conference_on_Neural_Information_Processing_Systems" title="Conference on Neural Information Processing Systems">NeurIPS</a></li> <li><a href="/wiki/International_Conference_on_Machine_Learning" title="International Conference on Machine Learning">ICML</a></li> <li><a href="/wiki/International_Conference_on_Learning_Representations" title="International Conference on Learning Representations">ICLR</a></li> <li><a href="/wiki/International_Joint_Conference_on_Artificial_Intelligence" title="International Joint Conference on Artificial Intelligence">IJCAI</a></li> <li><a href="/wiki/Machine_Learning_(journal)" title="Machine Learning (journal)">ML</a></li> <li><a href="/wiki/Journal_of_Machine_Learning_Research" title="Journal of Machine Learning Research">JMLR</a></li></ul></div></div></td> </tr><tr><td class="sidebar-content"> <div class="sidebar-list mw-collapsible mw-collapsed machine-learning-list-title"><div class="sidebar-list-title" style="border-top:1px solid #aaa; text-align:center;;color: var(--color-base)">Related articles</div><div class="sidebar-list-content mw-collapsible-content hlist"> <ul><li><a href="/wiki/Glossary_of_artificial_intelligence" title="Glossary of artificial intelligence">Glossary of artificial intelligence</a></li> <li><a href="/wiki/List_of_datasets_for_machine-learning_research" title="List of datasets for machine-learning research">List of datasets for machine-learning research</a> <ul><li><a href="/wiki/List_of_datasets_in_computer_vision_and_image_processing" title="List of datasets in computer vision and image processing">List of datasets in computer vision and image processing</a></li></ul></li> <li><a href="/wiki/Outline_of_machine_learning" title="Outline of machine learning">Outline of machine learning</a></li></ul></div></div></td> </tr><tr><td class="sidebar-navbar"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Machine_learning" title="Template:Machine learning"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Machine_learning" title="Template talk:Machine learning"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Machine_learning" title="Special:EditPage/Template:Machine learning"><abbr title="Edit this template">e</abbr></a></li></ul></div></td></tr></tbody></table> <figure class="mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:LSTM_Cell.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/93/LSTM_Cell.svg/300px-LSTM_Cell.svg.png" decoding="async" width="300" height="205" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/93/LSTM_Cell.svg/450px-LSTM_Cell.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/93/LSTM_Cell.svg/600px-LSTM_Cell.svg.png 2x" data-file-width="512" data-file-height="350" /></a><figcaption>The Long Short-Term Memory (LSTM) cell can process data sequentially and keep its hidden state through time.</figcaption></figure> <p><b>Long short-term memory</b> (<b>LSTM</b>)<sup id="cite_ref-lstm1997_1-0" class="reference"><a href="#cite_note-lstm1997-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> is a type of <a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">recurrent neural network</a> (RNN) aimed at mitigating the <a href="/wiki/Vanishing_gradient_problem" title="Vanishing gradient problem">vanishing gradient problem</a><sup id="cite_ref-hochreiter1991_2-0" class="reference"><a href="#cite_note-hochreiter1991-2"><span class="cite-bracket">&#91;</span>2<span class="cite-bracket">&#93;</span></a></sup> commonly encountered by traditional RNNs. Its relative insensitivity to gap length is its advantage over other RNNs, <a href="/wiki/Hidden_Markov_models" class="mw-redirect" title="Hidden Markov models">hidden Markov models</a>, and other sequence learning methods. It aims to provide a short-term memory for RNN that can last thousands of timesteps (thus "<i>long</i> short-term memory").<sup id="cite_ref-lstm1997_1-1" class="reference"><a href="#cite_note-lstm1997-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> The name is made in analogy with <a href="/wiki/Long-term_memory" title="Long-term memory">long-term memory</a> and <a href="/wiki/Short-term_memory" title="Short-term memory">short-term memory</a> and their relationship, studied by cognitive psychologists since the early 20th century. </p><p>An LSTM unit is typically composed of a cell and three <a href="/wiki/Gating_mechanism" title="Gating mechanism">gates</a>: an input gate, an output gate,<sup id="cite_ref-hochreiter1996_3-0" class="reference"><a href="#cite_note-hochreiter1996-3"><span class="cite-bracket">&#91;</span>3<span class="cite-bracket">&#93;</span></a></sup> and a forget gate.<sup id="cite_ref-lstm2000_4-0" class="reference"><a href="#cite_note-lstm2000-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup> The cell remembers values over arbitrary time intervals, and the gates regulate the flow of information into and out of the cell. Forget gates decide what information to discard from the previous state, by mapping the previous state and the current input to a value between 0 and 1. A (rounded) value of 1 signifies retention of the information, and a value of 0 represents discarding. Input gates decide which pieces of new information to store in the current cell state, using the same system as forget gates. Output gates control which pieces of information in the current cell state to output, by assigning a value from 0 to 1 to the information, considering the previous and current states. Selectively outputting relevant information from the current state allows the LSTM network to maintain useful, long-term dependencies to make predictions, both in current and future time-steps. </p><p>LSTM has wide applications in <a href="/wiki/Classification_in_machine_learning" class="mw-redirect" title="Classification in machine learning">classification</a>,<sup id="cite_ref-graves2006_5-0" class="reference"><a href="#cite_note-graves2006-5"><span class="cite-bracket">&#91;</span>5<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Data_processing" title="Data processing">data processing</a>, <a href="/wiki/Time_series" title="Time series">time series</a> analysis tasks,<sup id="cite_ref-wierstra2005_7-0" class="reference"><a href="#cite_note-wierstra2005-7"><span class="cite-bracket">&#91;</span>7<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Speech_recognition" title="Speech recognition">speech recognition</a>,<sup id="cite_ref-sak2014_8-0" class="reference"><a href="#cite_note-sak2014-8"><span class="cite-bracket">&#91;</span>8<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-liwu2015_9-0" class="reference"><a href="#cite_note-liwu2015-9"><span class="cite-bracket">&#91;</span>9<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Machine_translation" title="Machine translation">machine translation</a>,<sup id="cite_ref-GoogleTranslate_10-0" class="reference"><a href="#cite_note-GoogleTranslate-10"><span class="cite-bracket">&#91;</span>10<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-FacebookTranslate_11-0" class="reference"><a href="#cite_note-FacebookTranslate-11"><span class="cite-bracket">&#91;</span>11<span class="cite-bracket">&#93;</span></a></sup> speech activity detection,<sup id="cite_ref-12" class="reference"><a href="#cite_note-12"><span class="cite-bracket">&#91;</span>12<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Robot_control" title="Robot control">robot control</a>,<sup id="cite_ref-mayer2006_13-0" class="reference"><a href="#cite_note-mayer2006-13"><span class="cite-bracket">&#91;</span>13<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-OpenAIhand_14-0" class="reference"><a href="#cite_note-OpenAIhand-14"><span class="cite-bracket">&#91;</span>14<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Video_game" title="Video game">video games</a>,<sup id="cite_ref-OpenAIfive_15-0" class="reference"><a href="#cite_note-OpenAIfive-15"><span class="cite-bracket">&#91;</span>15<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-alphastar_16-0" class="reference"><a href="#cite_note-alphastar-16"><span class="cite-bracket">&#91;</span>16<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Healthcare" class="mw-redirect" title="Healthcare">healthcare</a>.<sup id="cite_ref-decade2022_17-0" class="reference"><a href="#cite_note-decade2022-17"><span class="cite-bracket">&#91;</span>17<span class="cite-bracket">&#93;</span></a></sup> </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="Motivation">Motivation</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=1" title="Edit section: Motivation"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>In theory, classic <a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">RNNs</a> can keep track of arbitrary long-term dependencies in the input sequences. The problem with classic RNNs is computational (or practical) in nature: when training a classic RNN using <a href="/wiki/Back-propagation" class="mw-redirect" title="Back-propagation">back-propagation</a>, the long-term gradients which are back-propagated can <a href="/wiki/Vanishing_gradient_problem" title="Vanishing gradient problem">"vanish"</a>, meaning they can tend to zero due to very small numbers creeping into the computations, causing the model to effectively stop learning. RNNs using LSTM units partially solve the <a href="/wiki/Vanishing_gradient_problem" title="Vanishing gradient problem">vanishing gradient problem</a>, because LSTM units allow gradients to also flow with little to no attenuation. However, LSTM networks can still suffer from the exploding gradient problem.<sup id="cite_ref-calin2020_18-0" class="reference"><a href="#cite_note-calin2020-18"><span class="cite-bracket">&#91;</span>18<span class="cite-bracket">&#93;</span></a></sup> </p><p>The intuition behind the LSTM architecture is to create an additional module in a neural network that learns when to remember and when to forget pertinent information.<sup id="cite_ref-lstm2000_4-1" class="reference"><a href="#cite_note-lstm2000-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup> In other words, the network effectively learns which information might be needed later on in a sequence and when that information is no longer needed. For instance, in the context of <a href="/wiki/Natural_language_processing" title="Natural language processing">natural language processing</a>, the network can learn grammatical dependencies.<sup id="cite_ref-LakretzKruszewskiDesbordes2019_19-0" class="reference"><a href="#cite_note-LakretzKruszewskiDesbordes2019-19"><span class="cite-bracket">&#91;</span>19<span class="cite-bracket">&#93;</span></a></sup> An LSTM might process the sentence "<u>Dave</u>, as a result of <u>his</u> controversial claims, <u>is</u> now a pariah" by remembering the (statistically likely) grammatical gender and number of the subject <i>Dave</i>, note that this information is pertinent for the pronoun <i>his</i> and note that this information is no longer important after the verb <i>is</i>. </p> <div class="mw-heading mw-heading2"><h2 id="Variants">Variants</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=2" title="Edit section: Variants"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>In the equations below, the lowercase variables represent vectors. Matrices <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle W_{q}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>q</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle W_{q}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d16355ad959593cf720b24fffe62d99af53d15d9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:3.182ex; height:2.843ex;" alt="{\displaystyle W_{q}}" /></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle U_{q}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>q</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle U_{q}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/05e27486afb11613504d6d6b9f6bd72e322607c8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:2.576ex; height:2.843ex;" alt="{\displaystyle U_{q}}" /></span> contain, respectively, the weights of the input and recurrent connections, where the subscript <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle _{q}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>q</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle _{q}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e333a711146f91533eb5a030accc0e90948e4f92" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:0.989ex; height:1.676ex;" alt="{\displaystyle _{q}}" /></span> can either be the input gate <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/add78d8608ad86e54951b8c8bd6c8d8416533d20" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.802ex; height:2.176ex;" alt="{\displaystyle i}" /></span>, output gate <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle o}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>o</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle o}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0c1031f61947aa3d1cf3a70ec3e4904df2c3675d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.128ex; height:1.676ex;" alt="{\displaystyle o}" /></span>, the forget gate <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/132e57acb643253e7810ee9702d9581f159a1c61" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.279ex; height:2.509ex;" alt="{\displaystyle f}" /></span> or the memory cell <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>c</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/86a67b81c2de995bd608d5b2df50cd8cd7d92455" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.007ex; height:1.676ex;" alt="{\displaystyle c}" /></span>, depending on the activation being calculated. In this section, we are thus using a "vector notation". So, for example, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t}\in \mathbb {R} ^{h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t}\in \mathbb {R} ^{h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/65d6f2af820422ed59a0f14af91eee7498ebc4a2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:7.531ex; height:3.009ex;" alt="{\displaystyle c_{t}\in \mathbb {R} ^{h}}" /></span> is not just one unit of one LSTM cell, but contains <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle h}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>h</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle h}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b26be3e694314bc90c3215047e4a2010c6ee184a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.339ex; height:2.176ex;" alt="{\displaystyle h}" /></span> LSTM cell's units. </p><p>See <sup id="cite_ref-ASearchSpaceOdyssey_20-0" class="reference"><a href="#cite_note-ASearchSpaceOdyssey-20"><span class="cite-bracket">&#91;</span>20<span class="cite-bracket">&#93;</span></a></sup> for an empirical study of 8 architectural variants of LSTM. </p> <div class="mw-heading mw-heading3"><h3 id="LSTM_with_a_forget_gate">LSTM with a forget gate</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=3" title="Edit section: LSTM with a forget gate"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The compact forms of the equations for the forward pass of an LSTM cell with a forget gate are:<sup id="cite_ref-lstm1997_1-2" class="reference"><a href="#cite_note-lstm1997-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-lstm2000_4-2" class="reference"><a href="#cite_note-lstm2000-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup> </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\begin{aligned}f_{t}&amp;=\sigma _{g}(W_{f}x_{t}+U_{f}h_{t-1}+b_{f})\\i_{t}&amp;=\sigma _{g}(W_{i}x_{t}+U_{i}h_{t-1}+b_{i})\\o_{t}&amp;=\sigma _{g}(W_{o}x_{t}+U_{o}h_{t-1}+b_{o})\\{\tilde {c}}_{t}&amp;=\tanh _{c}(W_{c}x_{t}+U_{c}h_{t-1}+b_{c})\\c_{t}&amp;=f_{t}\odot c_{t-1}+i_{t}\odot {\tilde {c}}_{t}\\h_{t}&amp;=o_{t}\odot \sigma _{h}(c_{t})\end{aligned}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"> <mtr> <mtd> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>i</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>c</mi> <mo stretchy="false">&#x7e;<!-- ~ --></mo> </mover> </mrow> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>tanh</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <mo>&#x2061;<!-- ⁡ --></mo> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>i</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>c</mi> <mo stretchy="false">&#x7e;<!-- ~ --></mo> </mover> </mrow> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> </mtr> <mtr> <mtd> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> </mtable> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\begin{aligned}f_{t}&amp;=\sigma _{g}(W_{f}x_{t}+U_{f}h_{t-1}+b_{f})\\i_{t}&amp;=\sigma _{g}(W_{i}x_{t}+U_{i}h_{t-1}+b_{i})\\o_{t}&amp;=\sigma _{g}(W_{o}x_{t}+U_{o}h_{t-1}+b_{o})\\{\tilde {c}}_{t}&amp;=\tanh _{c}(W_{c}x_{t}+U_{c}h_{t-1}+b_{c})\\c_{t}&amp;=f_{t}\odot c_{t-1}+i_{t}\odot {\tilde {c}}_{t}\\h_{t}&amp;=o_{t}\odot \sigma _{h}(c_{t})\end{aligned}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f1693e4ebd58b8060c67437774d3b2aec8921743" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -8.838ex; width:33.134ex; height:18.843ex;" alt="{\displaystyle {\begin{aligned}f_{t}&amp;=\sigma _{g}(W_{f}x_{t}+U_{f}h_{t-1}+b_{f})\\i_{t}&amp;=\sigma _{g}(W_{i}x_{t}+U_{i}h_{t-1}+b_{i})\\o_{t}&amp;=\sigma _{g}(W_{o}x_{t}+U_{o}h_{t-1}+b_{o})\\{\tilde {c}}_{t}&amp;=\tanh _{c}(W_{c}x_{t}+U_{c}h_{t-1}+b_{c})\\c_{t}&amp;=f_{t}\odot c_{t-1}+i_{t}\odot {\tilde {c}}_{t}\\h_{t}&amp;=o_{t}\odot \sigma _{h}(c_{t})\end{aligned}}}" /></span></dd></dl> <p>where the initial values are <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{0}=0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> <mo>=</mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{0}=0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/29af3d4e887815bb3b9b9eab4f7540a376fccd73" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:6.322ex; height:2.509ex;" alt="{\displaystyle c_{0}=0}" /></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle h_{0}=0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mn>0</mn> </mrow> </msub> <mo>=</mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle h_{0}=0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/14a294b6cf9cbde4c37efd966913a63d316e615c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:6.654ex; height:2.509ex;" alt="{\displaystyle h_{0}=0}" /></span> and the operator <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \odot }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo>&#x2299;<!-- ⊙ --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \odot }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e89e009eb8a8839c82aa5c76c15e9f2d67006276" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.505ex; width:1.808ex; height:2.176ex;" alt="{\displaystyle \odot }" /></span> denotes the <a href="/wiki/Hadamard_product_(matrices)" title="Hadamard product (matrices)">Hadamard product</a> (element-wise product). The subscript <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/65658b7b223af9e1acc877d848888ecdb4466560" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.84ex; height:2.009ex;" alt="{\displaystyle t}" /></span> indexes the time step. </p> <div class="mw-heading mw-heading4"><h4 id="Variables">Variables</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=4" title="Edit section: Variables"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Letting the superscripts <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle d}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>d</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle d}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e85ff03cbe0c7341af6b982e47e9f90d235c66ab" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.216ex; height:2.176ex;" alt="{\displaystyle d}" /></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle h}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>h</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle h}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b26be3e694314bc90c3215047e4a2010c6ee184a" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.339ex; height:2.176ex;" alt="{\displaystyle h}" /></span> refer to the number of input features and number of hidden units, respectively: </p> <ul><li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle x_{t}\in \mathbb {R} ^{d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle x_{t}\in \mathbb {R} ^{d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/d528d57c5517e90795e0a6d6760463564236fee7" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:7.766ex; height:3.009ex;" alt="{\displaystyle x_{t}\in \mathbb {R} ^{d}}" /></span>: input vector to the LSTM unit</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{t}\in {(0,1)}^{h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">(</mo> <mn>0</mn> <mo>,</mo> <mn>1</mn> <mo stretchy="false">)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{t}\in {(0,1)}^{h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6801986d2383af655b5ba1ca1c48b535b4f60bbf" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:11.153ex; height:3.343ex;" alt="{\displaystyle f_{t}\in {(0,1)}^{h}}" /></span>: forget gate's activation vector</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i_{t}\in {(0,1)}^{h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>i</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">(</mo> <mn>0</mn> <mo>,</mo> <mn>1</mn> <mo stretchy="false">)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i_{t}\in {(0,1)}^{h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/e8cb1a28fcd328d06438c80be6308b6a3ccdc5d7" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:10.816ex; height:3.343ex;" alt="{\displaystyle i_{t}\in {(0,1)}^{h}}" /></span>: input/update gate's activation vector</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle o_{t}\in {(0,1)}^{h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">(</mo> <mn>0</mn> <mo>,</mo> <mn>1</mn> <mo stretchy="false">)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle o_{t}\in {(0,1)}^{h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/f3961562e04e80ad79e482894135770237ce7a59" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:11.141ex; height:3.343ex;" alt="{\displaystyle o_{t}\in {(0,1)}^{h}}" /></span>: output gate's activation vector</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle h_{t}\in {(-1,1)}^{h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">(</mo> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> <mo>,</mo> <mn>1</mn> <mo stretchy="false">)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle h_{t}\in {(-1,1)}^{h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/ca986de16d69ffdecaa766c76306d30c406f57fa" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:13.161ex; height:3.343ex;" alt="{\displaystyle h_{t}\in {(-1,1)}^{h}}" /></span>: hidden state vector also known as output vector of the LSTM unit</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\tilde {c}}_{t}\in {(-1,1)}^{h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mrow class="MJX-TeXAtom-ORD"> <mrow class="MJX-TeXAtom-ORD"> <mover> <mi>c</mi> <mo stretchy="false">&#x7e;<!-- ~ --></mo> </mover> </mrow> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mo stretchy="false">(</mo> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> <mo>,</mo> <mn>1</mn> <mo stretchy="false">)</mo> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\tilde {c}}_{t}\in {(-1,1)}^{h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/df821ed31ee4133fc9a3a272008dfcb43279c204" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:13.114ex; height:3.343ex;" alt="{\displaystyle {\tilde {c}}_{t}\in {(-1,1)}^{h}}" /></span>: cell input activation vector</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t}\in \mathbb {R} ^{h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t}\in \mathbb {R} ^{h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/65d6f2af820422ed59a0f14af91eee7498ebc4a2" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:7.531ex; height:3.009ex;" alt="{\displaystyle c_{t}\in \mathbb {R} ^{h}}" /></span>: cell state vector</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle W\in \mathbb {R} ^{h\times d}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>W</mi> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> <mo>&#xd7;<!-- × --></mo> <mi>d</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle W\in \mathbb {R} ^{h\times d}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/925afdb42f13f1d912db87ecda65135eb9fe6352" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:10.271ex; height:2.676ex;" alt="{\displaystyle W\in \mathbb {R} ^{h\times d}}" /></span>, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle U\in \mathbb {R} ^{h\times h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>U</mi> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> <mo>&#xd7;<!-- × --></mo> <mi>h</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle U\in \mathbb {R} ^{h\times h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0ff9bb53a5409a1e51f6130b3dfbcdad63324880" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:9.706ex; height:2.676ex;" alt="{\displaystyle U\in \mathbb {R} ^{h\times h}}" /></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle b\in \mathbb {R} ^{h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>b</mi> <mo>&#x2208;<!-- ∈ --></mo> <msup> <mrow class="MJX-TeXAtom-ORD"> <mi mathvariant="double-struck">R</mi> </mrow> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msup> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle b\in \mathbb {R} ^{h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/289515b23d7df7e2e09f2ee38951abf345e60080" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:6.695ex; height:2.676ex;" alt="{\displaystyle b\in \mathbb {R} ^{h}}" /></span>: weight matrices and bias vector parameters which need to be learned during training</li></ul> <div class="mw-heading mw-heading4"><h4 id="Activation_functions"><a href="/wiki/Activation_function" title="Activation function">Activation functions</a></h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=5" title="Edit section: Activation functions"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \sigma _{g}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \sigma _{g}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/086f92de077f853afd7f5d22fb3d305cbf5e0ac3" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.005ex; width:2.349ex; height:2.343ex;" alt="{\displaystyle \sigma _{g}}" /></span>: <a href="/wiki/Sigmoid_function" title="Sigmoid function">sigmoid function</a>.</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \sigma _{c}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \sigma _{c}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/b436b43abda74fce1a6859e03d34c914c6a240f4" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.272ex; height:2.009ex;" alt="{\displaystyle \sigma _{c}}" /></span>: <a href="/wiki/Hyperbolic_tangent" class="mw-redirect" title="Hyperbolic tangent">hyperbolic tangent</a> function.</li> <li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \sigma _{h}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \sigma _{h}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8a7f19495f5a65d26570b54b7ca332956d27b27b" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.506ex; height:2.009ex;" alt="{\displaystyle \sigma _{h}}" /></span>: hyperbolic tangent function or, as the peephole LSTM paper<sup id="cite_ref-peepholeLSTM_21-0" class="reference"><a href="#cite_note-peepholeLSTM-21"><span class="cite-bracket">&#91;</span>21<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-peephole2002_22-0" class="reference"><a href="#cite_note-peephole2002-22"><span class="cite-bracket">&#91;</span>22<span class="cite-bracket">&#93;</span></a></sup> suggests, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \sigma _{h}(x)=x}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msub> <mo stretchy="false">(</mo> <mi>x</mi> <mo stretchy="false">)</mo> <mo>=</mo> <mi>x</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \sigma _{h}(x)=x}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/98d34299f4e04f10f7c22e1219bf182712c3e0fc" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.838ex; width:10.074ex; height:2.843ex;" alt="{\displaystyle \sigma _{h}(x)=x}" /></span>.</li></ul> <div class="mw-heading mw-heading3"><h3 id="Peephole_LSTM">Peephole LSTM</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=6" title="Edit section: Peephole LSTM"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure typeof="mw:File/Thumb"><a href="/wiki/File:Peephole_Long_Short-Term_Memory.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/5/53/Peephole_Long_Short-Term_Memory.svg/300px-Peephole_Long_Short-Term_Memory.svg.png" decoding="async" width="300" height="165" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/5/53/Peephole_Long_Short-Term_Memory.svg/450px-Peephole_Long_Short-Term_Memory.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/5/53/Peephole_Long_Short-Term_Memory.svg/600px-Peephole_Long_Short-Term_Memory.svg.png 2x" data-file-width="542" data-file-height="298" /></a><figcaption>A peephole LSTM unit with input (i.e. <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/add78d8608ad86e54951b8c8bd6c8d8416533d20" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.802ex; height:2.176ex;" alt="{\displaystyle i}" /></span>), output (i.e. <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle o}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>o</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle o}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0c1031f61947aa3d1cf3a70ec3e4904df2c3675d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.128ex; height:1.676ex;" alt="{\displaystyle o}" /></span>), and forget (i.e. <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/132e57acb643253e7810ee9702d9581f159a1c61" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.279ex; height:2.509ex;" alt="{\displaystyle f}" /></span>) gates</figcaption></figure> <p>The figure on the right is a graphical representation of an LSTM unit with peephole connections (i.e. a peephole LSTM).<sup id="cite_ref-peepholeLSTM_21-1" class="reference"><a href="#cite_note-peepholeLSTM-21"><span class="cite-bracket">&#91;</span>21<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-peephole2002_22-1" class="reference"><a href="#cite_note-peephole2002-22"><span class="cite-bracket">&#91;</span>22<span class="cite-bracket">&#93;</span></a></sup> Peephole connections allow the gates to access the constant error carousel (CEC), whose activation is the cell state.<sup id="cite_ref-peepholeLSTM_21-2" class="reference"><a href="#cite_note-peepholeLSTM-21"><span class="cite-bracket">&#91;</span>21<span class="cite-bracket">&#93;</span></a></sup> <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle h_{t-1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle h_{t-1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/cf56fc7e1114417475762546403f3d66460975d0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:4.265ex; height:2.509ex;" alt="{\displaystyle h_{t-1}}" /></span> is not used, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t-1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t-1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0b5dbc0177993c2ebd927aee23d88bd263770532" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:3.933ex; height:2.009ex;" alt="{\displaystyle c_{t-1}}" /></span> is used instead in most places. </p> <dl><dd><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\begin{aligned}f_{t}&amp;=\sigma _{g}(W_{f}x_{t}+U_{f}c_{t-1}+b_{f})\\i_{t}&amp;=\sigma _{g}(W_{i}x_{t}+U_{i}c_{t-1}+b_{i})\\o_{t}&amp;=\sigma _{g}(W_{o}x_{t}+U_{o}c_{t-1}+b_{o})\\c_{t}&amp;=f_{t}\odot c_{t-1}+i_{t}\odot \sigma _{c}(W_{c}x_{t}+b_{c})\\h_{t}&amp;=o_{t}\odot \sigma _{h}(c_{t})\end{aligned}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"> <mtr> <mtd> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>i</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>i</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> </mtable> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\begin{aligned}f_{t}&amp;=\sigma _{g}(W_{f}x_{t}+U_{f}c_{t-1}+b_{f})\\i_{t}&amp;=\sigma _{g}(W_{i}x_{t}+U_{i}c_{t-1}+b_{i})\\o_{t}&amp;=\sigma _{g}(W_{o}x_{t}+U_{o}c_{t-1}+b_{o})\\c_{t}&amp;=f_{t}\odot c_{t-1}+i_{t}\odot \sigma _{c}(W_{c}x_{t}+b_{c})\\h_{t}&amp;=o_{t}\odot \sigma _{h}(c_{t})\end{aligned}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/fc535506b930c709edeefd4d6f24e45d0f2249ae" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -7.338ex; width:36.22ex; height:15.843ex;" alt="{\displaystyle {\begin{aligned}f_{t}&amp;=\sigma _{g}(W_{f}x_{t}+U_{f}c_{t-1}+b_{f})\\i_{t}&amp;=\sigma _{g}(W_{i}x_{t}+U_{i}c_{t-1}+b_{i})\\o_{t}&amp;=\sigma _{g}(W_{o}x_{t}+U_{o}c_{t-1}+b_{o})\\c_{t}&amp;=f_{t}\odot c_{t-1}+i_{t}\odot \sigma _{c}(W_{c}x_{t}+b_{c})\\h_{t}&amp;=o_{t}\odot \sigma _{h}(c_{t})\end{aligned}}}" /></span></dd></dl> <p>Each of the gates can be thought as a "standard" neuron in a feed-forward (or multi-layer) neural network: that is, they compute an activation (using an activation function) of a weighted sum. <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i_{t},o_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>i</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i_{t},o_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/cf0b0dee7b12fd921a114101ff11c83e1606a1f8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:4.616ex; height:2.509ex;" alt="{\displaystyle i_{t},o_{t}}" /></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/874c306411e808e8191e8aeb95e3440e1c68d6e9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.965ex; height:2.509ex;" alt="{\displaystyle f_{t}}" /></span> represent the activations of respectively the input, output and forget gates, at time step <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/65658b7b223af9e1acc877d848888ecdb4466560" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.84ex; height:2.009ex;" alt="{\displaystyle t}" /></span>. </p><p>The 3 exit arrows from the memory cell <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>c</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/86a67b81c2de995bd608d5b2df50cd8cd7d92455" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.007ex; height:1.676ex;" alt="{\displaystyle c}" /></span> to the 3 gates <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i,o}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> <mo>,</mo> <mi>o</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i,o}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4697b39f565cd54942b9f81d5de46dcdd1174528" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.964ex; height:2.509ex;" alt="{\displaystyle i,o}" /></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/132e57acb643253e7810ee9702d9581f159a1c61" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.279ex; height:2.509ex;" alt="{\displaystyle f}" /></span> represent the <i>peephole</i> connections. These peephole connections actually denote the contributions of the activation of the memory cell <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>c</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/86a67b81c2de995bd608d5b2df50cd8cd7d92455" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.007ex; height:1.676ex;" alt="{\displaystyle c}" /></span> at time step <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t-1}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t-1}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a215d9553945bb84b3b5a79cc796fb7d6e0629f0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.505ex; width:4.842ex; height:2.343ex;" alt="{\displaystyle t-1}" /></span>, i.e. the contribution of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t-1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t-1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0b5dbc0177993c2ebd927aee23d88bd263770532" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:3.933ex; height:2.009ex;" alt="{\displaystyle c_{t-1}}" /></span> (and not <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/93578e37f3234419a34df79845836bc0ec5ef76c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.833ex; height:2.009ex;" alt="{\displaystyle c_{t}}" /></span>, as the picture may suggest). In other words, the gates <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i,o}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>i</mi> <mo>,</mo> <mi>o</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i,o}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4697b39f565cd54942b9f81d5de46dcdd1174528" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:2.964ex; height:2.509ex;" alt="{\displaystyle i,o}" /></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>f</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/132e57acb643253e7810ee9702d9581f159a1c61" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.279ex; height:2.509ex;" alt="{\displaystyle f}" /></span> calculate their activations at time step <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/65658b7b223af9e1acc877d848888ecdb4466560" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:0.84ex; height:2.009ex;" alt="{\displaystyle t}" /></span> (i.e., respectively, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle i_{t},o_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>i</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>,</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle i_{t},o_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/cf0b0dee7b12fd921a114101ff11c83e1606a1f8" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:4.616ex; height:2.509ex;" alt="{\displaystyle i_{t},o_{t}}" /></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle f_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle f_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/874c306411e808e8191e8aeb95e3440e1c68d6e9" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.965ex; height:2.509ex;" alt="{\displaystyle f_{t}}" /></span>) also considering the activation of the memory cell <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>c</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/86a67b81c2de995bd608d5b2df50cd8cd7d92455" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:1.007ex; height:1.676ex;" alt="{\displaystyle c}" /></span> at time step <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle t-1}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle t-1}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a215d9553945bb84b3b5a79cc796fb7d6e0629f0" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.505ex; width:4.842ex; height:2.343ex;" alt="{\displaystyle t-1}" /></span>, i.e. <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t-1}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t-1}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0b5dbc0177993c2ebd927aee23d88bd263770532" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:3.933ex; height:2.009ex;" alt="{\displaystyle c_{t-1}}" /></span>. </p><p>The single left-to-right arrow exiting the memory cell is <i>not</i> a peephole connection and denotes <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle c_{t}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle c_{t}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/93578e37f3234419a34df79845836bc0ec5ef76c" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.671ex; width:1.833ex; height:2.009ex;" alt="{\displaystyle c_{t}}" /></span>. </p><p>The little circles containing a <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \times }"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo>&#xd7;<!-- × --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \times }</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/0ffafff1ad26cbe49045f19a67ce532116a32703" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: 0.019ex; margin-bottom: -0.19ex; width:1.808ex; height:1.509ex;" alt="{\displaystyle \times }" /></span> symbol represent an element-wise multiplication between its inputs. The big circles containing an <i>S</i>-like curve represent the application of a differentiable function (like the sigmoid function) to a weighted sum. </p> <div class="mw-heading mw-heading3"><h3 id="Peephole_convolutional_LSTM">Peephole convolutional LSTM</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=7" title="Edit section: Peephole convolutional LSTM"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Peephole <a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">convolutional</a> LSTM.<sup id="cite_ref-shi2015_23-0" class="reference"><a href="#cite_note-shi2015-23"><span class="cite-bracket">&#91;</span>23<span class="cite-bracket">&#93;</span></a></sup> The <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle *}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mo>&#x2217;<!-- ∗ --></mo> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle *}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8e9972f426d9e07855984f73ee195a21dbc21755" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: 0.079ex; margin-bottom: -0.25ex; width:1.162ex; height:1.509ex;" alt="{\displaystyle *}" /></span> denotes the <a href="/wiki/Convolution" title="Convolution">convolution</a> operator. </p> <dl><dd><span class="mwe-math-element" id="Page 4, formula 4 in [33] reference (Ot is calculated for &#39;&#39;C&#39;&#39;(&#39;&#39;t&#39;&#39;) intead of &#39;&#39;C&#39;&#39;(&#39;&#39;t&#39;&#39;&#160;−&#160;1)): https://arxiv.org/abs/1506.04214v2"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle {\begin{aligned}f_{t}&amp;=\sigma _{g}(W_{f}*x_{t}+U_{f}*h_{t-1}+V_{f}\odot c_{t-1}+b_{f})\\i_{t}&amp;=\sigma _{g}(W_{i}*x_{t}+U_{i}*h_{t-1}+V_{i}\odot c_{t-1}+b_{i})\\c_{t}&amp;=f_{t}\odot c_{t-1}+i_{t}\odot \sigma _{c}(W_{c}*x_{t}+U_{c}*h_{t-1}+b_{c})\\o_{t}&amp;=\sigma _{g}(W_{o}*x_{t}+U_{o}*h_{t-1}+V_{o}\odot c_{t}+b_{o})\\h_{t}&amp;=o_{t}\odot \sigma _{h}(c_{t})\end{aligned}}}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mrow class="MJX-TeXAtom-ORD"> <mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"> <mtr> <mtd> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <mo>&#x2217;<!-- ∗ --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <mo>&#x2217;<!-- ∗ --></mo> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>V</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>f</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>i</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>&#x2217;<!-- ∗ --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>&#x2217;<!-- ∗ --></mo> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>V</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>i</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>f</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>i</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <mo>&#x2217;<!-- ∗ --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <mo>&#x2217;<!-- ∗ --></mo> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>c</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>g</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <mo>&#x2217;<!-- ∗ --></mo> <msub> <mi>x</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>U</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <mo>&#x2217;<!-- ∗ --></mo> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> <mo>&#x2212;<!-- − --></mo> <mn>1</mn> </mrow> </msub> <mo>+</mo> <msub> <mi>V</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>+</mo> <msub> <mi>b</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>o</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> <mtr> <mtd> <msub> <mi>h</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> </mtd> <mtd> <mi></mi> <mo>=</mo> <msub> <mi>o</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo>&#x2299;<!-- ⊙ --></mo> <msub> <mi>&#x3c3;<!-- σ --></mi> <mrow class="MJX-TeXAtom-ORD"> <mi>h</mi> </mrow> </msub> <mo stretchy="false">(</mo> <msub> <mi>c</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>t</mi> </mrow> </msub> <mo stretchy="false">)</mo> </mtd> </mtr> </mtable> </mrow> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle {\begin{aligned}f_{t}&amp;=\sigma _{g}(W_{f}*x_{t}+U_{f}*h_{t-1}+V_{f}\odot c_{t-1}+b_{f})\\i_{t}&amp;=\sigma _{g}(W_{i}*x_{t}+U_{i}*h_{t-1}+V_{i}\odot c_{t-1}+b_{i})\\c_{t}&amp;=f_{t}\odot c_{t-1}+i_{t}\odot \sigma _{c}(W_{c}*x_{t}+U_{c}*h_{t-1}+b_{c})\\o_{t}&amp;=\sigma _{g}(W_{o}*x_{t}+U_{o}*h_{t-1}+V_{o}\odot c_{t}+b_{o})\\h_{t}&amp;=o_{t}\odot \sigma _{h}(c_{t})\end{aligned}}}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/84eefaa4a824754340378effe198b298b11cb5f5" class="mwe-math-fallback-image-inline mw-invert skin-invert" id="Page_4,_formula_4_in_[33]_reference_(Ot_is_calculated_for_&#39;&#39;C&#39;&#39;(&#39;&#39;t&#39;&#39;)_intead_of_&#39;&#39;C&#39;&#39;(&#39;&#39;t&#39;&#39;&#160;−&#160;1)):_https://arxiv.org/abs/1506.04214v2" aria-hidden="true" style="vertical-align: -7.338ex; width:50.247ex; height:15.843ex;" alt="{\displaystyle {\begin{aligned}f_{t}&amp;=\sigma _{g}(W_{f}*x_{t}+U_{f}*h_{t-1}+V_{f}\odot c_{t-1}+b_{f})\\i_{t}&amp;=\sigma _{g}(W_{i}*x_{t}+U_{i}*h_{t-1}+V_{i}\odot c_{t-1}+b_{i})\\c_{t}&amp;=f_{t}\odot c_{t-1}+i_{t}\odot \sigma _{c}(W_{c}*x_{t}+U_{c}*h_{t-1}+b_{c})\\o_{t}&amp;=\sigma _{g}(W_{o}*x_{t}+U_{o}*h_{t-1}+V_{o}\odot c_{t}+b_{o})\\h_{t}&amp;=o_{t}\odot \sigma _{h}(c_{t})\end{aligned}}}" /></span></dd></dl> <div class="mw-heading mw-heading2"><h2 id="Training">Training</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=8" title="Edit section: Training"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>An RNN using LSTM units can be trained in a supervised fashion on a set of training sequences, using an optimization algorithm like <a href="/wiki/Gradient_descent" title="Gradient descent">gradient descent</a> combined with <a href="/wiki/Backpropagation_through_time" title="Backpropagation through time">backpropagation through time</a> to compute the gradients needed during the optimization process, in order to change each weight of the LSTM network in proportion to the derivative of the error (at the output layer of the LSTM network) with respect to corresponding weight. </p><p>A problem with using <a href="/wiki/Gradient_descent" title="Gradient descent">gradient descent</a> for standard RNNs is that error gradients <a href="/wiki/Vanishing_gradient_problem" title="Vanishing gradient problem">vanish</a> exponentially quickly with the size of the time lag between important events. This is due to <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle \lim _{n\to \infty }W^{n}=0}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <munder> <mo movablelimits="true" form="prefix">lim</mo> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> <mo stretchy="false">&#x2192;<!-- → --></mo> <mi mathvariant="normal">&#x221e;<!-- ∞ --></mi> </mrow> </munder> <msup> <mi>W</mi> <mrow class="MJX-TeXAtom-ORD"> <mi>n</mi> </mrow> </msup> <mo>=</mo> <mn>0</mn> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle \lim _{n\to \infty }W^{n}=0}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/4f21d24f36ac54c2e3826fe618891ce17b19e12d" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -1.838ex; width:12.647ex; height:3.843ex;" alt="{\displaystyle \lim _{n\to \infty }W^{n}=0}" /></span> if the <a href="/wiki/Spectral_radius" title="Spectral radius">spectral radius</a> of <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle W}"> <semantics> <mrow class="MJX-TeXAtom-ORD"> <mstyle displaystyle="true" scriptlevel="0"> <mi>W</mi> </mstyle> </mrow> <annotation encoding="application/x-tex">{\displaystyle W}</annotation> </semantics> </math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/54a9c4c547f4d6111f81946cad242b18298d70b7" class="mwe-math-fallback-image-inline mw-invert skin-invert" aria-hidden="true" style="vertical-align: -0.338ex; width:2.435ex; height:2.176ex;" alt="{\displaystyle W}" /></span> is smaller than 1.<sup id="cite_ref-hochreiter1991_2-1" class="reference"><a href="#cite_note-hochreiter1991-2"><span class="cite-bracket">&#91;</span>2<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-gradf_24-0" class="reference"><a href="#cite_note-gradf-24"><span class="cite-bracket">&#91;</span>24<span class="cite-bracket">&#93;</span></a></sup> </p><p>However, with LSTM units, when error values are back-propagated from the output layer, the error remains in the LSTM unit's cell. This "error carousel" continuously feeds error back to each of the LSTM unit's gates, until they learn to cut off the value. </p> <div class="mw-heading mw-heading3"><h3 id="CTC_score_function">CTC score function</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=9" title="Edit section: CTC score function"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Many applications use stacks of LSTM RNNs<sup id="cite_ref-fernandez2007ijcai_25-0" class="reference"><a href="#cite_note-fernandez2007ijcai-25"><span class="cite-bracket">&#91;</span>25<span class="cite-bracket">&#93;</span></a></sup> and train them by <a href="/wiki/Connectionist_temporal_classification_(CTC)" class="mw-redirect" title="Connectionist temporal classification (CTC)">connectionist temporal classification (CTC)</a><sup id="cite_ref-graves2006_5-1" class="reference"><a href="#cite_note-graves2006-5"><span class="cite-bracket">&#91;</span>5<span class="cite-bracket">&#93;</span></a></sup> to find an RNN weight matrix that maximizes the probability of the label sequences in a training set, given the corresponding input sequences. CTC achieves both alignment and recognition. </p> <div class="mw-heading mw-heading3"><h3 id="Alternatives">Alternatives</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=10" title="Edit section: Alternatives"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Sometimes, it can be advantageous to train (parts of) an LSTM by <a href="/wiki/Neuroevolution" title="Neuroevolution">neuroevolution</a><sup id="cite_ref-wierstra2005_7-1" class="reference"><a href="#cite_note-wierstra2005-7"><span class="cite-bracket">&#91;</span>7<span class="cite-bracket">&#93;</span></a></sup> or by policy gradient methods, especially when there is no "teacher" (that is, training labels). </p> <div class="mw-heading mw-heading2"><h2 id="Applications">Applications</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=11" title="Edit section: Applications"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Applications of LSTM include: </p> <style data-mw-deduplicate="TemplateStyles:r1184024115">.mw-parser-output .div-col{margin-top:0.3em;column-width:30em}.mw-parser-output .div-col-small{font-size:90%}.mw-parser-output .div-col-rules{column-rule:1px solid #aaa}.mw-parser-output .div-col dl,.mw-parser-output .div-col ol,.mw-parser-output .div-col ul{margin-top:0}.mw-parser-output .div-col li,.mw-parser-output .div-col dd{page-break-inside:avoid;break-inside:avoid-column}</style><div class="div-col" style="column-width: 25em;"> <ul><li><a href="/wiki/Robot_control" title="Robot control">Robot control</a><sup id="cite_ref-mayer2006_13-1" class="reference"><a href="#cite_note-mayer2006-13"><span class="cite-bracket">&#91;</span>13<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Time_series_prediction" class="mw-redirect" title="Time series prediction">Time series prediction</a><sup id="cite_ref-wierstra2005_7-2" class="reference"><a href="#cite_note-wierstra2005-7"><span class="cite-bracket">&#91;</span>7<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a><sup id="cite_ref-graves2005_26-0" class="reference"><a href="#cite_note-graves2005-26"><span class="cite-bracket">&#91;</span>26<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-fernandez2007icann_27-0" class="reference"><a href="#cite_note-fernandez2007icann-27"><span class="cite-bracket">&#91;</span>27<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-graves2013_28-0" class="reference"><a href="#cite_note-graves2013-28"><span class="cite-bracket">&#91;</span>28<span class="cite-bracket">&#93;</span></a></sup></li> <li>Rhythm learning<sup id="cite_ref-peephole2002_22-2" class="reference"><a href="#cite_note-peephole2002-22"><span class="cite-bracket">&#91;</span>22<span class="cite-bracket">&#93;</span></a></sup></li> <li>Hydrological rainfall–runoff modeling<sup id="cite_ref-29" class="reference"><a href="#cite_note-29"><span class="cite-bracket">&#91;</span>29<span class="cite-bracket">&#93;</span></a></sup></li> <li>Music composition<sup id="cite_ref-eck2002_30-0" class="reference"><a href="#cite_note-eck2002-30"><span class="cite-bracket">&#91;</span>30<span class="cite-bracket">&#93;</span></a></sup></li> <li>Grammar learning<sup id="cite_ref-gers2002_31-0" class="reference"><a href="#cite_note-gers2002-31"><span class="cite-bracket">&#91;</span>31<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-peepholeLSTM_21-3" class="reference"><a href="#cite_note-peepholeLSTM-21"><span class="cite-bracket">&#91;</span>21<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-perez2003_32-0" class="reference"><a href="#cite_note-perez2003-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Handwriting_recognition" title="Handwriting recognition">Handwriting recognition</a><sup id="cite_ref-graves2009nips_33-0" class="reference"><a href="#cite_note-graves2009nips-33"><span class="cite-bracket">&#91;</span>33<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-34" class="reference"><a href="#cite_note-34"><span class="cite-bracket">&#91;</span>34<span class="cite-bracket">&#93;</span></a></sup></li> <li>Human action recognition<sup id="cite_ref-baccouche2011_35-0" class="reference"><a href="#cite_note-baccouche2011-35"><span class="cite-bracket">&#91;</span>35<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Sign_language" title="Sign language">Sign language translation</a><sup id="cite_ref-huang2018_36-0" class="reference"><a href="#cite_note-huang2018-36"><span class="cite-bracket">&#91;</span>36<span class="cite-bracket">&#93;</span></a></sup></li> <li>Protein homology detection<sup id="cite_ref-hochreiter2007_37-0" class="reference"><a href="#cite_note-hochreiter2007-37"><span class="cite-bracket">&#91;</span>37<span class="cite-bracket">&#93;</span></a></sup></li> <li>Predicting subcellular localization of proteins<sup id="cite_ref-thireou2007_38-0" class="reference"><a href="#cite_note-thireou2007-38"><span class="cite-bracket">&#91;</span>38<span class="cite-bracket">&#93;</span></a></sup></li> <li>Time series <a href="/wiki/Anomaly_detection" title="Anomaly detection">anomaly detection</a><sup id="cite_ref-malhotra2015_39-0" class="reference"><a href="#cite_note-malhotra2015-39"><span class="cite-bracket">&#91;</span>39<span class="cite-bracket">&#93;</span></a></sup></li> <li>Several prediction tasks in the area of <a href="/wiki/Business_process_management" title="Business process management">business process management</a><sup id="cite_ref-tax2017_40-0" class="reference"><a href="#cite_note-tax2017-40"><span class="cite-bracket">&#91;</span>40<span class="cite-bracket">&#93;</span></a></sup></li> <li>Prediction in medical care pathways<sup id="cite_ref-choi2016_41-0" class="reference"><a href="#cite_note-choi2016-41"><span class="cite-bracket">&#91;</span>41<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Semantic_parsing" title="Semantic parsing">Semantic parsing</a><sup id="cite_ref-jia2016_42-0" class="reference"><a href="#cite_note-jia2016-42"><span class="cite-bracket">&#91;</span>42<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Object_co-segmentation" title="Object co-segmentation">Object co-segmentation</a><sup id="cite_ref-Wang_Duan_Zhang_Niu_p=1657_43-0" class="reference"><a href="#cite_note-Wang_Duan_Zhang_Niu_p=1657-43"><span class="cite-bracket">&#91;</span>43<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-Duan_Wang_Zhai_Zheng_2018_p._44-0" class="reference"><a href="#cite_note-Duan_Wang_Zhai_Zheng_2018_p.-44"><span class="cite-bracket">&#91;</span>44<span class="cite-bracket">&#93;</span></a></sup></li> <li>Airport passenger management<sup id="cite_ref-orsini2019_45-0" class="reference"><a href="#cite_note-orsini2019-45"><span class="cite-bracket">&#91;</span>45<span class="cite-bracket">&#93;</span></a></sup></li> <li>Short-term <a href="/wiki/Traffic_forecast" class="mw-redirect" title="Traffic forecast">traffic forecast</a><sup id="cite_ref-liu2017_46-0" class="reference"><a href="#cite_note-liu2017-46"><span class="cite-bracket">&#91;</span>46<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Drug_design" title="Drug design">Drug design</a><sup id="cite_ref-pmid29095571_47-0" class="reference"><a href="#cite_note-pmid29095571-47"><span class="cite-bracket">&#91;</span>47<span class="cite-bracket">&#93;</span></a></sup></li> <li>Market Prediction<sup id="cite_ref-saiful2020_48-0" class="reference"><a href="#cite_note-saiful2020-48"><span class="cite-bracket">&#91;</span>48<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/w/index.php?title=Activity_Classification_in_Video&amp;action=edit&amp;redlink=1" class="new" title="Activity Classification in Video (page does not exist)">Activity Classification in Video</a><sup id="cite_ref-renamed_from_2023_on_20240120110022_49-0" class="reference"><a href="#cite_note-renamed_from_2023_on_20240120110022-49"><span class="cite-bracket">&#91;</span>49<span class="cite-bracket">&#93;</span></a></sup></li></ul> </div><p><b>2015:</b> Google started using an LSTM trained by CTC for speech recognition on Google Voice.<sup id="cite_ref-Beau15_50-0" class="reference"><a href="#cite_note-Beau15-50"><span class="cite-bracket">&#91;</span>50<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-GoogleVoiceSearch_51-0" class="reference"><a href="#cite_note-GoogleVoiceSearch-51"><span class="cite-bracket">&#91;</span>51<span class="cite-bracket">&#93;</span></a></sup> According to the official blog post, the new model cut transcription errors by 49%.<sup id="cite_ref-googleblog2015_52-0" class="reference"><a href="#cite_note-googleblog2015-52"><span class="cite-bracket">&#91;</span>52<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2016:</b> Google started using an LSTM to suggest messages in the Allo conversation app.<sup id="cite_ref-GoogleAllo_53-0" class="reference"><a href="#cite_note-GoogleAllo-53"><span class="cite-bracket">&#91;</span>53<span class="cite-bracket">&#93;</span></a></sup> In the same year, Google released the <a href="/wiki/Google_Neural_Machine_Translation" title="Google Neural Machine Translation">Google Neural Machine Translation</a> system for Google Translate which used LSTMs to reduce translation errors by 60%.<sup id="cite_ref-GoogleTranslate_10-1" class="reference"><a href="#cite_note-GoogleTranslate-10"><span class="cite-bracket">&#91;</span>10<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-WiredGoogleTranslate_54-0" class="reference"><a href="#cite_note-WiredGoogleTranslate-54"><span class="cite-bracket">&#91;</span>54<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-googleblog2016_55-0" class="reference"><a href="#cite_note-googleblog2016-55"><span class="cite-bracket">&#91;</span>55<span class="cite-bracket">&#93;</span></a></sup> </p><p>Apple announced in its <a href="/wiki/Worldwide_Developers_Conference" title="Worldwide Developers Conference">Worldwide Developers Conference</a> that it would start using the LSTM for quicktype<sup id="cite_ref-AppleQuicktype_56-0" class="reference"><a href="#cite_note-AppleQuicktype-56"><span class="cite-bracket">&#91;</span>56<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-AppleQuicktype2_57-0" class="reference"><a href="#cite_note-AppleQuicktype2-57"><span class="cite-bracket">&#91;</span>57<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-58" class="reference"><a href="#cite_note-58"><span class="cite-bracket">&#91;</span>58<span class="cite-bracket">&#93;</span></a></sup> in the iPhone and for Siri.<sup id="cite_ref-AppleSiri_59-0" class="reference"><a href="#cite_note-AppleSiri-59"><span class="cite-bracket">&#91;</span>59<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-capes2017_60-0" class="reference"><a href="#cite_note-capes2017-60"><span class="cite-bracket">&#91;</span>60<span class="cite-bracket">&#93;</span></a></sup> </p><p>Amazon released <a href="/wiki/Amazon_Polly" title="Amazon Polly">Polly</a>, which generates the voices behind Alexa, using a bidirectional LSTM for the text-to-speech technology.<sup id="cite_ref-AmazonAlexa_61-0" class="reference"><a href="#cite_note-AmazonAlexa-61"><span class="cite-bracket">&#91;</span>61<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2017:</b> Facebook performed some 4.5 billion automatic translations every day using long short-term memory networks.<sup id="cite_ref-FacebookTranslate_11-1" class="reference"><a href="#cite_note-FacebookTranslate-11"><span class="cite-bracket">&#91;</span>11<span class="cite-bracket">&#93;</span></a></sup> </p><p>Microsoft reported reaching 94.9% recognition accuracy on the <a href="/w/index.php?title=Switchboard_corpus&amp;action=edit&amp;redlink=1" class="new" title="Switchboard corpus (page does not exist)">Switchboard corpus</a>, incorporating a vocabulary of 165,000 words. The approach used "dialog session-based long-short-term memory".<sup id="cite_ref-62" class="reference"><a href="#cite_note-62"><span class="cite-bracket">&#91;</span>62<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2018:</b> <a href="/wiki/OpenAI" title="OpenAI">OpenAI</a> used LSTM trained by policy gradients to beat humans in the complex video game of Dota 2,<sup id="cite_ref-OpenAIfive_15-1" class="reference"><a href="#cite_note-OpenAIfive-15"><span class="cite-bracket">&#91;</span>15<span class="cite-bracket">&#93;</span></a></sup> and to control a human-like robot hand that manipulates physical objects with unprecedented dexterity.<sup id="cite_ref-OpenAIhand_14-1" class="reference"><a href="#cite_note-OpenAIhand-14"><span class="cite-bracket">&#91;</span>14<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-miraculous2021_63-0" class="reference"><a href="#cite_note-miraculous2021-63"><span class="cite-bracket">&#91;</span>63<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2019:</b> <a href="/wiki/DeepMind" class="mw-redirect" title="DeepMind">DeepMind</a> used LSTM trained by policy gradients to excel at the complex video game of <a href="/wiki/Starcraft_II" class="mw-redirect" title="Starcraft II">Starcraft II</a>.<sup id="cite_ref-alphastar_16-1" class="reference"><a href="#cite_note-alphastar-16"><span class="cite-bracket">&#91;</span>16<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-miraculous2021_63-1" class="reference"><a href="#cite_note-miraculous2021-63"><span class="cite-bracket">&#91;</span>63<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="History">History</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=12" title="Edit section: History"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-heading mw-heading3"><h3 id="Development">Development</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=13" title="Edit section: Development"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Aspects of LSTM were anticipated by "focused back-propagation" (Mozer, 1989),<sup id="cite_ref-mozer1989_64-0" class="reference"><a href="#cite_note-mozer1989-64"><span class="cite-bracket">&#91;</span>64<span class="cite-bracket">&#93;</span></a></sup> cited by the LSTM paper.<sup id="cite_ref-lstm1997_1-3" class="reference"><a href="#cite_note-lstm1997-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> </p><p>Sepp Hochreiter's 1991 German diploma thesis analyzed the <a href="/wiki/Vanishing_gradient_problem" title="Vanishing gradient problem">vanishing gradient problem</a> and developed principles of the method.<sup id="cite_ref-hochreiter1991_2-2" class="reference"><a href="#cite_note-hochreiter1991-2"><span class="cite-bracket">&#91;</span>2<span class="cite-bracket">&#93;</span></a></sup> His supervisor, <a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a>, considered the thesis highly significant.<sup id="cite_ref-DLhistory_65-0" class="reference"><a href="#cite_note-DLhistory-65"><span class="cite-bracket">&#91;</span>65<span class="cite-bracket">&#93;</span></a></sup> </p><p>An early version of LSTM was published in 1995 in a technical report by <a href="/wiki/Sepp_Hochreiter" title="Sepp Hochreiter">Sepp Hochreiter</a> and <a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a>,<sup id="cite_ref-66" class="reference"><a href="#cite_note-66"><span class="cite-bracket">&#91;</span>66<span class="cite-bracket">&#93;</span></a></sup> then published in the <a href="/wiki/Conference_on_Neural_Information_Processing_Systems" title="Conference on Neural Information Processing Systems">NIPS</a> 1996 conference.<sup id="cite_ref-hochreiter1996_3-1" class="reference"><a href="#cite_note-hochreiter1996-3"><span class="cite-bracket">&#91;</span>3<span class="cite-bracket">&#93;</span></a></sup> </p><p>The most commonly used reference point for LSTM was published in 1997 in the journal <a href="/wiki/Neural_Computation_(journal)" title="Neural Computation (journal)">Neural Computation</a>.<sup id="cite_ref-lstm1997_1-4" class="reference"><a href="#cite_note-lstm1997-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> By introducing Constant Error Carousel (CEC) units, LSTM deals with the <a href="/wiki/Vanishing_gradient_problem" title="Vanishing gradient problem">vanishing gradient problem</a>. The initial version of LSTM block included cells, input and output gates.<sup id="cite_ref-ASearchSpaceOdyssey_20-1" class="reference"><a href="#cite_note-ASearchSpaceOdyssey-20"><span class="cite-bracket">&#91;</span>20<span class="cite-bracket">&#93;</span></a></sup> </p><p>(<a href="/wiki/Felix_Gers" title="Felix Gers">Felix Gers</a>, Jürgen Schmidhuber, and Fred Cummins, 1999)<sup id="cite_ref-lstm1999_67-0" class="reference"><a href="#cite_note-lstm1999-67"><span class="cite-bracket">&#91;</span>67<span class="cite-bracket">&#93;</span></a></sup> introduced the forget gate (also called "keep gate") into the LSTM architecture in 1999, enabling the LSTM to reset its own state.<sup id="cite_ref-ASearchSpaceOdyssey_20-2" class="reference"><a href="#cite_note-ASearchSpaceOdyssey-20"><span class="cite-bracket">&#91;</span>20<span class="cite-bracket">&#93;</span></a></sup> This is the most commonly used version of LSTM nowadays. </p><p>(Gers, Schmidhuber, and Cummins, 2000) added peephole connections.<sup id="cite_ref-peepholeLSTM_21-4" class="reference"><a href="#cite_note-peepholeLSTM-21"><span class="cite-bracket">&#91;</span>21<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-peephole2002_22-3" class="reference"><a href="#cite_note-peephole2002-22"><span class="cite-bracket">&#91;</span>22<span class="cite-bracket">&#93;</span></a></sup> Additionally, the output activation function was omitted.<sup id="cite_ref-ASearchSpaceOdyssey_20-3" class="reference"><a href="#cite_note-ASearchSpaceOdyssey-20"><span class="cite-bracket">&#91;</span>20<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Development_of_variants">Development of variants</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=14" title="Edit section: Development of variants"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>(Graves, Fernandez, Gomez, and Schmidhuber, 2006)<sup id="cite_ref-graves2006_5-2" class="reference"><a href="#cite_note-graves2006-5"><span class="cite-bracket">&#91;</span>5<span class="cite-bracket">&#93;</span></a></sup> introduce a new error function for LSTM: <a href="/wiki/Connectionist_Temporal_Classification" class="mw-redirect" title="Connectionist Temporal Classification">Connectionist Temporal Classification</a> (CTC) for simultaneous alignment and recognition of sequences. </p><p>(Graves, Schmidhuber, 2005)<sup id="cite_ref-graves2005_26-1" class="reference"><a href="#cite_note-graves2005-26"><span class="cite-bracket">&#91;</span>26<span class="cite-bracket">&#93;</span></a></sup> published LSTM with full <a href="/wiki/Backpropagation_through_time" title="Backpropagation through time">backpropagation through time</a> and bidirectional LSTM. </p><p>(Kyunghyun Cho et al., 2014)<sup id="cite_ref-cho2014_68-0" class="reference"><a href="#cite_note-cho2014-68"><span class="cite-bracket">&#91;</span>68<span class="cite-bracket">&#93;</span></a></sup> published a simplified variant of the forget gate LSTM<sup id="cite_ref-lstm1999_67-1" class="reference"><a href="#cite_note-lstm1999-67"><span class="cite-bracket">&#91;</span>67<span class="cite-bracket">&#93;</span></a></sup> called <a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">Gated recurrent unit</a> (GRU). </p><p>(Rupesh Kumar Srivastava, Klaus Greff, and Schmidhuber, 2015) used LSTM principles<sup id="cite_ref-lstm1999_67-2" class="reference"><a href="#cite_note-lstm1999-67"><span class="cite-bracket">&#91;</span>67<span class="cite-bracket">&#93;</span></a></sup> to create the <a href="/wiki/Highway_network" title="Highway network">Highway network</a>, a <a href="/wiki/Feedforward_neural_network" title="Feedforward neural network">feedforward neural network</a> with hundreds of layers, much deeper than previous networks.<sup id="cite_ref-highway2015_69-0" class="reference"><a href="#cite_note-highway2015-69"><span class="cite-bracket">&#91;</span>69<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-highway2015neurips_70-0" class="reference"><a href="#cite_note-highway2015neurips-70"><span class="cite-bracket">&#91;</span>70<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-mostcited2021_71-0" class="reference"><a href="#cite_note-mostcited2021-71"><span class="cite-bracket">&#91;</span>71<span class="cite-bracket">&#93;</span></a></sup> Concurrently, the <a href="/wiki/Residual_neural_network" title="Residual neural network">ResNet</a> architecture was developed. It is equivalent to an open-gated or gateless highway network.<sup id="cite_ref-resnet2015_72-0" class="reference"><a href="#cite_note-resnet2015-72"><span class="cite-bracket">&#91;</span>72<span class="cite-bracket">&#93;</span></a></sup> </p><p>A modern upgrade of LSTM called <a href="/w/index.php?title=XLSTM&amp;action=edit&amp;redlink=1" class="new" title="XLSTM (page does not exist)">xLSTM</a> is published by a team led by <a href="/wiki/Sepp_Hochreiter" title="Sepp Hochreiter">Sepp Hochreiter</a> (Maximilian et al, 2024).<sup id="cite_ref-73" class="reference"><a href="#cite_note-73"><span class="cite-bracket">&#91;</span>73<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-74" class="reference"><a href="#cite_note-74"><span class="cite-bracket">&#91;</span>74<span class="cite-bracket">&#93;</span></a></sup> One of the 2 blocks (mLSTM) of the architecture are parallelizable like the <a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a> architecture, the other ones (sLSTM) allow state tracking. </p> <div class="mw-heading mw-heading3"><h3 id="Applications_2">Applications</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=15" title="Edit section: Applications"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><b>2004:</b> First successful application of LSTM to speech <a href="/wiki/Alex_Graves_(computer_scientist)" title="Alex Graves (computer scientist)">Alex Graves</a> et al.<sup id="cite_ref-graves2004_75-0" class="reference"><a href="#cite_note-graves2004-75"><span class="cite-bracket">&#91;</span>75<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-miraculous2021_63-2" class="reference"><a href="#cite_note-miraculous2021-63"><span class="cite-bracket">&#91;</span>63<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2001:</b> Gers and Schmidhuber trained LSTM to learn languages unlearnable by traditional models such as Hidden Markov Models.<sup id="cite_ref-peepholeLSTM_21-5" class="reference"><a href="#cite_note-peepholeLSTM-21"><span class="cite-bracket">&#91;</span>21<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-miraculous2021_63-3" class="reference"><a href="#cite_note-miraculous2021-63"><span class="cite-bracket">&#91;</span>63<span class="cite-bracket">&#93;</span></a></sup> </p><p>Hochreiter et al. used LSTM for <a href="/wiki/Meta-learning_(computer_science)" title="Meta-learning (computer science)">meta-learning</a> (i.e. learning a learning algorithm).<sup id="cite_ref-76" class="reference"><a href="#cite_note-76"><span class="cite-bracket">&#91;</span>76<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2005:</b> Daan Wierstra, Faustino Gomez, and Schmidhuber trained LSTM by <a href="/wiki/Neuroevolution" title="Neuroevolution">neuroevolution</a> without a teacher.<sup id="cite_ref-wierstra2005_7-3" class="reference"><a href="#cite_note-wierstra2005-7"><span class="cite-bracket">&#91;</span>7<span class="cite-bracket">&#93;</span></a></sup> </p><p>Mayer et al. trained LSTM to control <a href="/wiki/Robot" title="Robot">robots</a>.<sup id="cite_ref-mayer2006_13-2" class="reference"><a href="#cite_note-mayer2006-13"><span class="cite-bracket">&#91;</span>13<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2007:</b> Wierstra, Foerster, Peters, and Schmidhuber trained LSTM by policy gradients for <a href="/wiki/Reinforcement_learning" title="Reinforcement learning">reinforcement learning</a> without a teacher.<sup id="cite_ref-wierstra2007_77-0" class="reference"><a href="#cite_note-wierstra2007-77"><span class="cite-bracket">&#91;</span>77<span class="cite-bracket">&#93;</span></a></sup> </p><p>Hochreiter, Heuesel, and Obermayr applied LSTM to protein homology detection the field of <a href="/wiki/Biology" title="Biology">biology</a>.<sup id="cite_ref-hochreiter2007_37-1" class="reference"><a href="#cite_note-hochreiter2007-37"><span class="cite-bracket">&#91;</span>37<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2009:</b> Justin Bayer et al. introduced <a href="/wiki/Neural_architecture_search" title="Neural architecture search">neural architecture search</a> for LSTM.<sup id="cite_ref-bayer2009_78-0" class="reference"><a href="#cite_note-bayer2009-78"><span class="cite-bracket">&#91;</span>78<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-miraculous2021_63-4" class="reference"><a href="#cite_note-miraculous2021-63"><span class="cite-bracket">&#91;</span>63<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2009:</b> An LSTM trained by CTC won the <a href="/wiki/ICDAR" class="mw-redirect" title="ICDAR">ICDAR</a> connected handwriting recognition competition. Three such models were submitted by a team led by <a href="/wiki/Alex_Graves_(computer_scientist)" title="Alex Graves (computer scientist)">Alex Graves</a>.<sup id="cite_ref-graves2009_79-0" class="reference"><a href="#cite_note-graves2009-79"><span class="cite-bracket">&#91;</span>79<span class="cite-bracket">&#93;</span></a></sup> One was the most accurate model in the competition and another was the fastest.<sup id="cite_ref-maergner2009_80-0" class="reference"><a href="#cite_note-maergner2009-80"><span class="cite-bracket">&#91;</span>80<span class="cite-bracket">&#93;</span></a></sup> This was the first time an RNN won international competitions.<sup id="cite_ref-miraculous2021_63-5" class="reference"><a href="#cite_note-miraculous2021-63"><span class="cite-bracket">&#91;</span>63<span class="cite-bracket">&#93;</span></a></sup> </p><p><b>2013:</b> Alex Graves, Abdel-rahman Mohamed, and Geoffrey Hinton used LSTM networks as a major component of a network that achieved a record 17.7% <a href="/wiki/Phoneme" title="Phoneme">phoneme</a> error rate on the classic <a href="/wiki/TIMIT" title="TIMIT">TIMIT</a> natural speech dataset.<sup id="cite_ref-graves2013_28-1" class="reference"><a href="#cite_note-graves2013-28"><span class="cite-bracket">&#91;</span>28<span class="cite-bracket">&#93;</span></a></sup> </p><p>Researchers from <a href="/wiki/Michigan_State_University" title="Michigan State University">Michigan State University</a>, <a href="/wiki/IBM_Research" title="IBM Research">IBM Research</a>, and <a href="/wiki/Cornell_University" title="Cornell University">Cornell University</a> published a study in the Knowledge Discovery and Data Mining (KDD) conference.<sup id="cite_ref-81" class="reference"><a href="#cite_note-81"><span class="cite-bracket">&#91;</span>81<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-82" class="reference"><a href="#cite_note-82"><span class="cite-bracket">&#91;</span>82<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-83" class="reference"><a href="#cite_note-83"><span class="cite-bracket">&#91;</span>83<span class="cite-bracket">&#93;</span></a></sup> Their Time-Aware LSTM (T-LSTM) performs better on certain data sets than standard LSTM. </p> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=16" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1184024115" /><div class="div-col" style="column-width: 20em;"> <ul><li><a href="/wiki/Attention_(machine_learning)" title="Attention (machine learning)">Attention (machine learning)</a></li> <li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li> <li><a href="/wiki/Differentiable_neural_computer" title="Differentiable neural computer">Differentiable neural computer</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">Gated recurrent unit</a></li> <li><a href="/wiki/Highway_network" title="Highway network">Highway network</a></li> <li><a href="/wiki/Long-term_potentiation" title="Long-term potentiation">Long-term potentiation</a></li> <li><a href="/wiki/Prefrontal_cortex_basal_ganglia_working_memory" title="Prefrontal cortex basal ganglia working memory">Prefrontal cortex basal ganglia working memory</a></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network</a></li> <li><a href="/wiki/Seq2seq" title="Seq2seq">Seq2seq</a></li> <li><a href="/wiki/Time_aware_long_short-term_memory" title="Time aware long short-term memory">Time aware long short-term memory</a></li> <li><a href="/wiki/Transformer_(machine_learning_model)" class="mw-redirect" title="Transformer (machine learning model)">Transformer (machine learning model)</a></li> <li><a href="/wiki/Time_series" title="Time series">Time series</a></li></ul> </div> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=17" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist"> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-lstm1997-1"><span class="mw-cite-backlink">^ <a href="#cite_ref-lstm1997_1-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-lstm1997_1-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-lstm1997_1-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-lstm1997_1-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-lstm1997_1-4"><sup><i><b>e</b></i></sup></a></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite id="CITEREFSepp_HochreiterJürgen_Schmidhuber1997" class="citation journal cs1"><a href="/wiki/Sepp_Hochreiter" title="Sepp Hochreiter">Sepp Hochreiter</a>; <a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a> (1997). <a rel="nofollow" class="external text" href="https://www.researchgate.net/publication/13853244">"Long short-term memory"</a>. <i><a href="/wiki/Neural_Computation_(journal)" title="Neural Computation (journal)">Neural Computation</a></i>. <b>9</b> (8): <span class="nowrap">1735–</span>1780. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1162%2Fneco.1997.9.8.1735">10.1162/neco.1997.9.8.1735</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/9377276">9377276</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:1915014">1915014</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Neural+Computation&amp;rft.atitle=Long+short-term+memory&amp;rft.volume=9&amp;rft.issue=8&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1735-%3C%2Fspan%3E1780&amp;rft.date=1997&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A1915014%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F9377276&amp;rft_id=info%3Adoi%2F10.1162%2Fneco.1997.9.8.1735&amp;rft.au=Sepp+Hochreiter&amp;rft.au=J%C3%BCrgen+Schmidhuber&amp;rft_id=https%3A%2F%2Fwww.researchgate.net%2Fpublication%2F13853244&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-hochreiter1991-2"><span class="mw-cite-backlink">^ <a href="#cite_ref-hochreiter1991_2-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-hochreiter1991_2-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-hochreiter1991_2-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHochreiter1991" class="citation thesis cs1">Hochreiter, Sepp (1991). <a rel="nofollow" class="external text" href="http://www.bioinf.jku.at/publications/older/3804.pdf"><i>Untersuchungen zu dynamischen neuronalen Netzen</i></a> <span class="cs1-format">(PDF)</span> (diploma thesis). Technical University Munich, Institute of Computer Science.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adissertation&amp;rft.title=Untersuchungen+zu+dynamischen+neuronalen+Netzen&amp;rft.degree=diploma&amp;rft.inst=Technical+University+Munich%2C+Institute+of+Computer+Science&amp;rft.date=1991&amp;rft.aulast=Hochreiter&amp;rft.aufirst=Sepp&amp;rft_id=http%3A%2F%2Fwww.bioinf.jku.at%2Fpublications%2Folder%2F3804.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-hochreiter1996-3"><span class="mw-cite-backlink">^ <a href="#cite_ref-hochreiter1996_3-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-hochreiter1996_3-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHochreiterSchmidhuber1996" class="citation journal cs1">Hochreiter, Sepp; Schmidhuber, Jürgen (1996-12-03). <a rel="nofollow" class="external text" href="https://dl.acm.org/doi/10.5555/2998981.2999048">"LSTM can solve hard long time lag problems"</a>. <i>Proceedings of the 9th International Conference on Neural Information Processing Systems</i>. NIPS'96. Cambridge, MA, USA: MIT Press: <span class="nowrap">473–</span>479.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Proceedings+of+the+9th+International+Conference+on+Neural+Information+Processing+Systems&amp;rft.atitle=LSTM+can+solve+hard+long+time+lag+problems&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E473-%3C%2Fspan%3E479&amp;rft.date=1996-12-03&amp;rft.aulast=Hochreiter&amp;rft.aufirst=Sepp&amp;rft.au=Schmidhuber%2C+J%C3%BCrgen&amp;rft_id=https%3A%2F%2Fdl.acm.org%2Fdoi%2F10.5555%2F2998981.2999048&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-lstm2000-4"><span class="mw-cite-backlink">^ <a href="#cite_ref-lstm2000_4-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-lstm2000_4-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-lstm2000_4-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFFelix_A._GersJürgen_SchmidhuberFred_Cummins2000" class="citation journal cs1">Felix A. Gers; Jürgen Schmidhuber; Fred Cummins (2000). "Learning to Forget: Continual Prediction with LSTM". <i><a href="/wiki/Neural_Computation_(journal)" title="Neural Computation (journal)">Neural Computation</a></i>. <b>12</b> (10): <span class="nowrap">2451–</span>2471. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.5709">10.1.1.55.5709</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1162%2F089976600300015015">10.1162/089976600300015015</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/11032042">11032042</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:11598600">11598600</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Neural+Computation&amp;rft.atitle=Learning+to+Forget%3A+Continual+Prediction+with+LSTM&amp;rft.volume=12&amp;rft.issue=10&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E2451-%3C%2Fspan%3E2471&amp;rft.date=2000&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.55.5709%23id-name%3DCiteSeerX&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A11598600%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F11032042&amp;rft_id=info%3Adoi%2F10.1162%2F089976600300015015&amp;rft.au=Felix+A.+Gers&amp;rft.au=J%C3%BCrgen+Schmidhuber&amp;rft.au=Fred+Cummins&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-graves2006-5"><span class="mw-cite-backlink">^ <a href="#cite_ref-graves2006_5-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-graves2006_5-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-graves2006_5-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGravesFernándezGomezSchmidhuber2006" class="citation journal cs1">Graves, Alex; Fernández, Santiago; Gomez, Faustino; Schmidhuber, Jürgen (2006). "Connectionist temporal classification: Labelling unsegmented sequence data with recurrent neural networks". <i>In Proceedings of the International Conference on Machine Learning, ICML 2006</i>: <span class="nowrap">369–</span>376. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.75.6306">10.1.1.75.6306</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=In+Proceedings+of+the+International+Conference+on+Machine+Learning%2C+ICML+2006&amp;rft.atitle=Connectionist+temporal+classification%3A+Labelling+unsegmented+sequence+data+with+recurrent+neural+networks&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E369-%3C%2Fspan%3E376&amp;rft.date=2006&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.75.6306%23id-name%3DCiteSeerX&amp;rft.aulast=Graves&amp;rft.aufirst=Alex&amp;rft.au=Fern%C3%A1ndez%2C+Santiago&amp;rft.au=Gomez%2C+Faustino&amp;rft.au=Schmidhuber%2C+J%C3%BCrgen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFKarimMajumdarDarabiChen2018" class="citation journal cs1">Karim, Fazle; Majumdar, Somshubra; Darabi, Houshang; Chen, Shun (2018). "LSTM Fully Convolutional Networks for Time Series Classification". <i>IEEE Access</i>. <b>6</b>: <span class="nowrap">1662–</span>1669. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1709.05206">1709.05206</a></span>. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2018IEEEA...6.1662K">2018IEEEA...6.1662K</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FACCESS.2017.2779939">10.1109/ACCESS.2017.2779939</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/2169-3536">2169-3536</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=IEEE+Access&amp;rft.atitle=LSTM+Fully+Convolutional+Networks+for+Time+Series+Classification&amp;rft.volume=6&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1662-%3C%2Fspan%3E1669&amp;rft.date=2018&amp;rft_id=info%3Aarxiv%2F1709.05206&amp;rft.issn=2169-3536&amp;rft_id=info%3Adoi%2F10.1109%2FACCESS.2017.2779939&amp;rft_id=info%3Abibcode%2F2018IEEEA...6.1662K&amp;rft.aulast=Karim&amp;rft.aufirst=Fazle&amp;rft.au=Majumdar%2C+Somshubra&amp;rft.au=Darabi%2C+Houshang&amp;rft.au=Chen%2C+Shun&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-wierstra2005-7"><span class="mw-cite-backlink">^ <a href="#cite_ref-wierstra2005_7-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-wierstra2005_7-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-wierstra2005_7-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-wierstra2005_7-3"><sup><i><b>d</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWierstraSchmidhuberGomez2005" class="citation journal cs1">Wierstra, Daan; Schmidhuber, J.; Gomez, F. J. (2005). <a rel="nofollow" class="external text" href="https://www.academia.edu/5830256">"Evolino: Hybrid Neuroevolution/Optimal Linear Search for Sequence Learning"</a>. <i>Proceedings of the 19th International Joint Conference on Artificial Intelligence (IJCAI), Edinburgh</i>: <span class="nowrap">853–</span>858.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Proceedings+of+the+19th+International+Joint+Conference+on+Artificial+Intelligence+%28IJCAI%29%2C+Edinburgh&amp;rft.atitle=Evolino%3A+Hybrid+Neuroevolution%2FOptimal+Linear+Search+for+Sequence+Learning&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E853-%3C%2Fspan%3E858&amp;rft.date=2005&amp;rft.aulast=Wierstra&amp;rft.aufirst=Daan&amp;rft.au=Schmidhuber%2C+J.&amp;rft.au=Gomez%2C+F.+J.&amp;rft_id=https%3A%2F%2Fwww.academia.edu%2F5830256&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-sak2014-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-sak2014_8-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSakSeniorBeaufays2014" class="citation web cs1">Sak, Hasim; Senior, Andrew; Beaufays, Francoise (2014). <a rel="nofollow" class="external text" href="https://web.archive.org/web/20180424203806/https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43905.pdf">"Long Short-Term Memory recurrent neural network architectures for large scale acoustic modeling"</a> <span class="cs1-format">(PDF)</span>. Archived from <a rel="nofollow" class="external text" href="https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43905.pdf">the original</a> <span class="cs1-format">(PDF)</span> on 2018-04-24.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Long+Short-Term+Memory+recurrent+neural+network+architectures+for+large+scale+acoustic+modeling&amp;rft.date=2014&amp;rft.aulast=Sak&amp;rft.aufirst=Hasim&amp;rft.au=Senior%2C+Andrew&amp;rft.au=Beaufays%2C+Francoise&amp;rft_id=https%3A%2F%2Fstatic.googleusercontent.com%2Fmedia%2Fresearch.google.com%2Fen%2F%2Fpubs%2Farchive%2F43905.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-liwu2015-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-liwu2015_9-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFLiWu2014" class="citation arxiv cs1">Li, Xiangang; Wu, Xihong (2014-10-15). "Constructing Long Short-Term Memory based Deep Recurrent Neural Networks for Large Vocabulary Speech Recognition". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1410.4281">1410.4281</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Constructing+Long+Short-Term+Memory+based+Deep+Recurrent+Neural+Networks+for+Large+Vocabulary+Speech+Recognition&amp;rft.date=2014-10-15&amp;rft_id=info%3Aarxiv%2F1410.4281&amp;rft.aulast=Li&amp;rft.aufirst=Xiangang&amp;rft.au=Wu%2C+Xihong&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-GoogleTranslate-10"><span class="mw-cite-backlink">^ <a href="#cite_ref-GoogleTranslate_10-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-GoogleTranslate_10-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWuSchusterChenLe2016" class="citation arxiv cs1">Wu, Yonghui; Schuster, Mike; Chen, Zhifeng; Le, Quoc V.; Norouzi, Mohammad; Macherey, Wolfgang; Krikun, Maxim; Cao, Yuan; Gao, Qin (2016-09-26). "Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1609.08144">1609.08144</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Google%27s+Neural+Machine+Translation+System%3A+Bridging+the+Gap+between+Human+and+Machine+Translation&amp;rft.date=2016-09-26&amp;rft_id=info%3Aarxiv%2F1609.08144&amp;rft.aulast=Wu&amp;rft.aufirst=Yonghui&amp;rft.au=Schuster%2C+Mike&amp;rft.au=Chen%2C+Zhifeng&amp;rft.au=Le%2C+Quoc+V.&amp;rft.au=Norouzi%2C+Mohammad&amp;rft.au=Macherey%2C+Wolfgang&amp;rft.au=Krikun%2C+Maxim&amp;rft.au=Cao%2C+Yuan&amp;rft.au=Gao%2C+Qin&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-FacebookTranslate-11"><span class="mw-cite-backlink">^ <a href="#cite_ref-FacebookTranslate_11-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-FacebookTranslate_11-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFOng2017" class="citation web cs1">Ong, Thuy (4 August 2017). <a rel="nofollow" class="external text" href="https://www.theverge.com/2017/8/4/16093872/facebook-ai-translations-artificial-intelligence">"Facebook's translations are now powered completely by AI"</a>. <i>www.allthingsdistributed.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2019-02-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=www.allthingsdistributed.com&amp;rft.atitle=Facebook%27s+translations+are+now+powered+completely+by+AI&amp;rft.date=2017-08-04&amp;rft.aulast=Ong&amp;rft.aufirst=Thuy&amp;rft_id=https%3A%2F%2Fwww.theverge.com%2F2017%2F8%2F4%2F16093872%2Ffacebook-ai-translations-artificial-intelligence&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-12">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSahidullahPatinoCornellYin2019" class="citation arxiv cs1">Sahidullah, Md; Patino, Jose; Cornell, Samuele; Yin, Ruiking; Sivasankaran, Sunit; Bredin, Herve; Korshunov, Pavel; Brutti, Alessio; Serizel, Romain; Vincent, Emmanuel; Evans, Nicholas; Marcel, Sebastien; Squartini, Stefano; Barras, Claude (2019-11-06). "The Speed Submission to DIHARD II: Contributions &amp; Lessons Learned". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1911.02388">1911.02388</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/eess.AS">eess.AS</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=The+Speed+Submission+to+DIHARD+II%3A+Contributions+%26+Lessons+Learned&amp;rft.date=2019-11-06&amp;rft_id=info%3Aarxiv%2F1911.02388&amp;rft.aulast=Sahidullah&amp;rft.aufirst=Md&amp;rft.au=Patino%2C+Jose&amp;rft.au=Cornell%2C+Samuele&amp;rft.au=Yin%2C+Ruiking&amp;rft.au=Sivasankaran%2C+Sunit&amp;rft.au=Bredin%2C+Herve&amp;rft.au=Korshunov%2C+Pavel&amp;rft.au=Brutti%2C+Alessio&amp;rft.au=Serizel%2C+Romain&amp;rft.au=Vincent%2C+Emmanuel&amp;rft.au=Evans%2C+Nicholas&amp;rft.au=Marcel%2C+Sebastien&amp;rft.au=Squartini%2C+Stefano&amp;rft.au=Barras%2C+Claude&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-mayer2006-13"><span class="mw-cite-backlink">^ <a href="#cite_ref-mayer2006_13-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-mayer2006_13-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-mayer2006_13-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMayerGomezWierstraNagy2006" class="citation book cs1">Mayer, H.; Gomez, F.; Wierstra, D.; Nagy, I.; Knoll, A.; Schmidhuber, J. (October 2006). "A System for Robotic Heart Surgery that Learns to Tie Knots Using Recurrent Neural Networks". <i>2006 IEEE/RSJ International Conference on Intelligent Robots and Systems</i>. pp.&#160;<span class="nowrap">543–</span>548. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.218.3399">10.1.1.218.3399</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FIROS.2006.282190">10.1109/IROS.2006.282190</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-4244-0258-8" title="Special:BookSources/978-1-4244-0258-8"><bdi>978-1-4244-0258-8</bdi></a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:12284900">12284900</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=A+System+for+Robotic+Heart+Surgery+that+Learns+to+Tie+Knots+Using+Recurrent+Neural+Networks&amp;rft.btitle=2006+IEEE%2FRSJ+International+Conference+on+Intelligent+Robots+and+Systems&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E543-%3C%2Fspan%3E548&amp;rft.date=2006-10&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.218.3399%23id-name%3DCiteSeerX&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A12284900%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1109%2FIROS.2006.282190&amp;rft.isbn=978-1-4244-0258-8&amp;rft.aulast=Mayer&amp;rft.aufirst=H.&amp;rft.au=Gomez%2C+F.&amp;rft.au=Wierstra%2C+D.&amp;rft.au=Nagy%2C+I.&amp;rft.au=Knoll%2C+A.&amp;rft.au=Schmidhuber%2C+J.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-OpenAIhand-14"><span class="mw-cite-backlink">^ <a href="#cite_ref-OpenAIhand_14-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-OpenAIhand_14-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation news cs1"><a rel="nofollow" class="external text" href="https://openai.com/research/learning-dexterity/">"Learning Dexterity"</a>. <i>OpenAI</i>. July 30, 2018<span class="reference-accessdate">. Retrieved <span class="nowrap">2023-06-28</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=OpenAI&amp;rft.atitle=Learning+Dexterity&amp;rft.date=2018-07-30&amp;rft_id=https%3A%2F%2Fopenai.com%2Fresearch%2Flearning-dexterity%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-OpenAIfive-15"><span class="mw-cite-backlink">^ <a href="#cite_ref-OpenAIfive_15-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-OpenAIfive_15-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFRodriguez2018" class="citation news cs1">Rodriguez, Jesus (July 2, 2018). <a rel="nofollow" class="external text" href="https://web.archive.org/web/20191226222000/https://towardsdatascience.com/the-science-behind-openai-five-that-just-produced-one-of-the-greatest-breakthrough-in-the-history-b045bcdc2b69?gi=24b20ef8ca3f">"The Science Behind OpenAI Five that just Produced One of the Greatest Breakthrough in the History of AI"</a>. <i>Towards Data Science</i>. Archived from <a rel="nofollow" class="external text" href="https://towardsdatascience.com/the-science-behind-openai-five-that-just-produced-one-of-the-greatest-breakthrough-in-the-history-b045bcdc2b69">the original</a> on 2019-12-26<span class="reference-accessdate">. Retrieved <span class="nowrap">2019-01-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Towards+Data+Science&amp;rft.atitle=The+Science+Behind+OpenAI+Five+that+just+Produced+One+of+the+Greatest+Breakthrough+in+the+History+of+AI&amp;rft.date=2018-07-02&amp;rft.aulast=Rodriguez&amp;rft.aufirst=Jesus&amp;rft_id=https%3A%2F%2Ftowardsdatascience.com%2Fthe-science-behind-openai-five-that-just-produced-one-of-the-greatest-breakthrough-in-the-history-b045bcdc2b69&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-alphastar-16"><span class="mw-cite-backlink">^ <a href="#cite_ref-alphastar_16-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-alphastar_16-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFStanford2019" class="citation news cs1">Stanford, Stacy (January 25, 2019). <a rel="nofollow" class="external text" href="https://medium.com/mlmemoirs/deepminds-ai-alphastar-showcases-significant-progress-towards-agi-93810c94fbe9">"DeepMind's AI, AlphaStar Showcases Significant Progress Towards AGI"</a>. <i>Medium ML Memoirs</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2019-01-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Medium+ML+Memoirs&amp;rft.atitle=DeepMind%27s+AI%2C+AlphaStar+Showcases+Significant+Progress+Towards+AGI&amp;rft.date=2019-01-25&amp;rft.aulast=Stanford&amp;rft.aufirst=Stacy&amp;rft_id=https%3A%2F%2Fmedium.com%2Fmlmemoirs%2Fdeepminds-ai-alphastar-showcases-significant-progress-towards-agi-93810c94fbe9&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-decade2022-17"><span class="mw-cite-backlink"><b><a href="#cite_ref-decade2022_17-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSchmidhuber2021" class="citation news cs1">Schmidhuber, Jürgen (2021). <a rel="nofollow" class="external text" href="https://people.idsia.ch/~juergen/2010s-our-decade-of-deep-learning.html">"The 2010s: Our Decade of Deep Learning / Outlook on the 2020s"</a>. <i>AI Blog</i>. IDSIA, Switzerland<span class="reference-accessdate">. Retrieved <span class="nowrap">2022-04-30</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=AI+Blog&amp;rft.atitle=The+2010s%3A+Our+Decade+of+Deep+Learning+%2F+Outlook+on+the+2020s&amp;rft.date=2021&amp;rft.aulast=Schmidhuber&amp;rft.aufirst=J%C3%BCrgen&amp;rft_id=https%3A%2F%2Fpeople.idsia.ch%2F~juergen%2F2010s-our-decade-of-deep-learning.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-calin2020-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-calin2020_18-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFCalin2020" class="citation book cs1">Calin, Ovidiu (14 February 2020). <i>Deep Learning Architectures</i>. Cham, Switzerland: Springer Nature. p.&#160;555. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-3-030-36720-6" title="Special:BookSources/978-3-030-36720-6"><bdi>978-3-030-36720-6</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Deep+Learning+Architectures&amp;rft.place=Cham%2C+Switzerland&amp;rft.pages=555&amp;rft.pub=Springer+Nature&amp;rft.date=2020-02-14&amp;rft.isbn=978-3-030-36720-6&amp;rft.aulast=Calin&amp;rft.aufirst=Ovidiu&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-LakretzKruszewskiDesbordes2019-19"><span class="mw-cite-backlink"><b><a href="#cite_ref-LakretzKruszewskiDesbordes2019_19-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFLakretzKruszewskiDesbordesHupkes2019" class="citation cs2">Lakretz, Yair; Kruszewski, German; Desbordes, Theo; Hupkes, Dieuwke; Dehaene, Stanislas; Baroni, Marco (2019), <a rel="nofollow" class="external text" href="https://aclanthology.org/N19-1002/">"The emergence of number and syntax units in"</a>, <a rel="nofollow" class="external text" href="https://pure.uva.nl/ws/files/49723040/N19_1002.pdf"><i>The emergence of number and syntax units</i></a> <span class="cs1-format">(PDF)</span>, Association for Computational Linguistics, pp.&#160;<span class="nowrap">11–</span>20, <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.18653%2Fv1%2FN19-1002">10.18653/v1/N19-1002</a>, <a href="/wiki/Hdl_(identifier)" class="mw-redirect" title="Hdl (identifier)">hdl</a>:<a rel="nofollow" class="external text" href="https://hdl.handle.net/11245.1%2F16cb6800-e10d-4166-8e0b-fed61ca6ebb4">11245.1/16cb6800-e10d-4166-8e0b-fed61ca6ebb4</a>, <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:81978369">81978369</a></cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=The+emergence+of+number+and+syntax+units+in&amp;rft.btitle=The+emergence+of+number+and+syntax+units&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E11-%3C%2Fspan%3E20&amp;rft.pub=Association+for+Computational+Linguistics&amp;rft.date=2019&amp;rft_id=info%3Ahdl%2F11245.1%2F16cb6800-e10d-4166-8e0b-fed61ca6ebb4&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A81978369%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.18653%2Fv1%2FN19-1002&amp;rft.aulast=Lakretz&amp;rft.aufirst=Yair&amp;rft.au=Kruszewski%2C+German&amp;rft.au=Desbordes%2C+Theo&amp;rft.au=Hupkes%2C+Dieuwke&amp;rft.au=Dehaene%2C+Stanislas&amp;rft.au=Baroni%2C+Marco&amp;rft_id=https%3A%2F%2Faclanthology.org%2FN19-1002%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-ASearchSpaceOdyssey-20"><span class="mw-cite-backlink">^ <a href="#cite_ref-ASearchSpaceOdyssey_20-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-ASearchSpaceOdyssey_20-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-ASearchSpaceOdyssey_20-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-ASearchSpaceOdyssey_20-3"><sup><i><b>d</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFKlaus_GreffRupesh_Kumar_SrivastavaJan_KoutníkBas_R._Steunebrink2015" class="citation journal cs1">Klaus Greff; Rupesh Kumar Srivastava; Jan Koutník; Bas R. Steunebrink; Jürgen Schmidhuber (2015). "LSTM: A Search Space Odyssey". <i>IEEE Transactions on Neural Networks and Learning Systems</i>. <b>28</b> (10): <span class="nowrap">2222–</span>2232. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1503.04069">1503.04069</a></span>. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2015arXiv150304069G">2015arXiv150304069G</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FTNNLS.2016.2582924">10.1109/TNNLS.2016.2582924</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/27411231">27411231</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:3356463">3356463</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=IEEE+Transactions+on+Neural+Networks+and+Learning+Systems&amp;rft.atitle=LSTM%3A+A+Search+Space+Odyssey&amp;rft.volume=28&amp;rft.issue=10&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E2222-%3C%2Fspan%3E2232&amp;rft.date=2015&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A3356463%23id-name%3DS2CID&amp;rft_id=info%3Abibcode%2F2015arXiv150304069G&amp;rft_id=info%3Aarxiv%2F1503.04069&amp;rft_id=info%3Apmid%2F27411231&amp;rft_id=info%3Adoi%2F10.1109%2FTNNLS.2016.2582924&amp;rft.au=Klaus+Greff&amp;rft.au=Rupesh+Kumar+Srivastava&amp;rft.au=Jan+Koutn%C3%ADk&amp;rft.au=Bas+R.+Steunebrink&amp;rft.au=J%C3%BCrgen+Schmidhuber&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-peepholeLSTM-21"><span class="mw-cite-backlink">^ <a href="#cite_ref-peepholeLSTM_21-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-peepholeLSTM_21-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-peepholeLSTM_21-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-peepholeLSTM_21-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-peepholeLSTM_21-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-peepholeLSTM_21-5"><sup><i><b>f</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGersSchmidhuber2001" class="citation journal cs1">Gers, F. A.; Schmidhuber, J. (2001). <a rel="nofollow" class="external text" href="ftp://ftp.idsia.ch/pub/juergen/L-IEEE.pdf">"LSTM Recurrent Networks Learn Simple Context Free and Context Sensitive Languages"</a> <span class="cs1-format">(PDF)</span>. <i>IEEE Transactions on Neural Networks</i>. <b>12</b> (6): <span class="nowrap">1333–</span>1340. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2F72.963769">10.1109/72.963769</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/18249962">18249962</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:10192330">10192330</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=IEEE+Transactions+on+Neural+Networks&amp;rft.atitle=LSTM+Recurrent+Networks+Learn+Simple+Context+Free+and+Context+Sensitive+Languages&amp;rft.volume=12&amp;rft.issue=6&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1333-%3C%2Fspan%3E1340&amp;rft.date=2001&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A10192330%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F18249962&amp;rft_id=info%3Adoi%2F10.1109%2F72.963769&amp;rft.aulast=Gers&amp;rft.aufirst=F.+A.&amp;rft.au=Schmidhuber%2C+J.&amp;rft_id=ftp%3A%2F%2Fftp.idsia.ch%2Fpub%2Fjuergen%2FL-IEEE.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-peephole2002-22"><span class="mw-cite-backlink">^ <a href="#cite_ref-peephole2002_22-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-peephole2002_22-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-peephole2002_22-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-peephole2002_22-3"><sup><i><b>d</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGersSchraudolphSchmidhuber2002" class="citation journal cs1">Gers, F.; Schraudolph, N.; Schmidhuber, J. (2002). <a rel="nofollow" class="external text" href="http://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf">"Learning precise timing with LSTM recurrent networks"</a> <span class="cs1-format">(PDF)</span>. <i>Journal of Machine Learning Research</i>. <b>3</b>: <span class="nowrap">115–</span>143.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Journal+of+Machine+Learning+Research&amp;rft.atitle=Learning+precise+timing+with+LSTM+recurrent+networks&amp;rft.volume=3&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E115-%3C%2Fspan%3E143&amp;rft.date=2002&amp;rft.aulast=Gers&amp;rft.aufirst=F.&amp;rft.au=Schraudolph%2C+N.&amp;rft.au=Schmidhuber%2C+J.&amp;rft_id=http%3A%2F%2Fwww.jmlr.org%2Fpapers%2Fvolume3%2Fgers02a%2Fgers02a.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-shi2015-23"><span class="mw-cite-backlink"><b><a href="#cite_ref-shi2015_23-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFXingjian_ShiZhourong_ChenHao_WangDit-Yan_Yeung2015" class="citation journal cs1">Xingjian Shi; Zhourong Chen; Hao Wang; Dit-Yan Yeung; Wai-kin Wong; Wang-chun Woo (2015). "Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting". <i>Proceedings of the 28th International Conference on Neural Information Processing Systems</i>: <span class="nowrap">802–</span>810. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1506.04214">1506.04214</a></span>. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2015arXiv150604214S">2015arXiv150604214S</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Proceedings+of+the+28th+International+Conference+on+Neural+Information+Processing+Systems&amp;rft.atitle=Convolutional+LSTM+Network%3A+A+Machine+Learning+Approach+for+Precipitation+Nowcasting&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E802-%3C%2Fspan%3E810&amp;rft.date=2015&amp;rft_id=info%3Aarxiv%2F1506.04214&amp;rft_id=info%3Abibcode%2F2015arXiv150604214S&amp;rft.au=Xingjian+Shi&amp;rft.au=Zhourong+Chen&amp;rft.au=Hao+Wang&amp;rft.au=Dit-Yan+Yeung&amp;rft.au=Wai-kin+Wong&amp;rft.au=Wang-chun+Woo&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-gradf-24"><span class="mw-cite-backlink"><b><a href="#cite_ref-gradf_24-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHochreiterBengioFrasconiSchmidhuber2001" class="citation book cs1">Hochreiter, S.; Bengio, Y.; Frasconi, P.; Schmidhuber, J. (2001). <a rel="nofollow" class="external text" href="https://www.researchgate.net/publication/2839938">"Gradient Flow in Recurrent Nets: the Difficulty of Learning Long-Term Dependencies (PDF Download Available)"</a>. In Kremer and, S. C.; Kolen, J. F. (eds.). <i>A Field Guide to Dynamical Recurrent Neural Networks</i>. IEEE Press.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Gradient+Flow+in+Recurrent+Nets%3A+the+Difficulty+of+Learning+Long-Term+Dependencies+%28PDF+Download+Available%29&amp;rft.btitle=A+Field+Guide+to+Dynamical+Recurrent+Neural+Networks.&amp;rft.pub=IEEE+Press&amp;rft.date=2001&amp;rft.aulast=Hochreiter&amp;rft.aufirst=S.&amp;rft.au=Bengio%2C+Y.&amp;rft.au=Frasconi%2C+P.&amp;rft.au=Schmidhuber%2C+J.&amp;rft_id=https%3A%2F%2Fwww.researchgate.net%2Fpublication%2F2839938&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-fernandez2007ijcai-25"><span class="mw-cite-backlink"><b><a href="#cite_ref-fernandez2007ijcai_25-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFFernándezGravesSchmidhuber2007" class="citation journal cs1">Fernández, Santiago; Graves, Alex; Schmidhuber, Jürgen (2007). "Sequence labelling in structured domains with hierarchical recurrent neural networks". <i>Proc. 20th Int. Joint Conf. On Artificial Intelligence, Ijcai 2007</i>: <span class="nowrap">774–</span>779. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.79.1887">10.1.1.79.1887</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Proc.+20th+Int.+Joint+Conf.+On+Artificial+Intelligence%2C+Ijcai+2007&amp;rft.atitle=Sequence+labelling+in+structured+domains+with+hierarchical+recurrent+neural+networks&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E774-%3C%2Fspan%3E779&amp;rft.date=2007&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.79.1887%23id-name%3DCiteSeerX&amp;rft.aulast=Fern%C3%A1ndez&amp;rft.aufirst=Santiago&amp;rft.au=Graves%2C+Alex&amp;rft.au=Schmidhuber%2C+J%C3%BCrgen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-graves2005-26"><span class="mw-cite-backlink">^ <a href="#cite_ref-graves2005_26-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-graves2005_26-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGravesSchmidhuber2005" class="citation journal cs1">Graves, A.; Schmidhuber, J. (2005). "Framewise phoneme classification with bidirectional LSTM and other neural network architectures". <i>Neural Networks</i>. <b>18</b> (<span class="nowrap">5–</span>6): <span class="nowrap">602–</span>610. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.331.5800">10.1.1.331.5800</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.neunet.2005.06.042">10.1016/j.neunet.2005.06.042</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/16112549">16112549</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:1856462">1856462</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Neural+Networks&amp;rft.atitle=Framewise+phoneme+classification+with+bidirectional+LSTM+and+other+neural+network+architectures&amp;rft.volume=18&amp;rft.issue=%3Cspan+class%3D%22nowrap%22%3E5%E2%80%93%3C%2Fspan%3E6&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E602-%3C%2Fspan%3E610&amp;rft.date=2005&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.331.5800%23id-name%3DCiteSeerX&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A1856462%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F16112549&amp;rft_id=info%3Adoi%2F10.1016%2Fj.neunet.2005.06.042&amp;rft.aulast=Graves&amp;rft.aufirst=A.&amp;rft.au=Schmidhuber%2C+J.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-fernandez2007icann-27"><span class="mw-cite-backlink"><b><a href="#cite_ref-fernandez2007icann_27-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFFernándezGravesSchmidhuber2007" class="citation journal cs1">Fernández, S.; Graves, A.; Schmidhuber, J. (9 September 2007). <a rel="nofollow" class="external text" href="http://dl.acm.org/citation.cfm?id=1778066.1778092">"An Application of Recurrent Neural Networks to Discriminative Keyword Spotting"</a>. <i>Proceedings of the 17th International Conference on Artificial Neural Networks</i>. ICANN'07. Berlin, Heidelberg: Springer-Verlag: <span class="nowrap">220–</span>229. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-3540746935" title="Special:BookSources/978-3540746935"><bdi>978-3540746935</bdi></a><span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2023</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Proceedings+of+the+17th+International+Conference+on+Artificial+Neural+Networks&amp;rft.atitle=An+Application+of+Recurrent+Neural+Networks+to+Discriminative+Keyword+Spotting&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E220-%3C%2Fspan%3E229&amp;rft.date=2007-09-09&amp;rft.isbn=978-3540746935&amp;rft.aulast=Fern%C3%A1ndez&amp;rft.aufirst=S.&amp;rft.au=Graves%2C+A.&amp;rft.au=Schmidhuber%2C+J.&amp;rft_id=http%3A%2F%2Fdl.acm.org%2Fcitation.cfm%3Fid%3D1778066.1778092&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-graves2013-28"><span class="mw-cite-backlink">^ <a href="#cite_ref-graves2013_28-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-graves2013_28-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGravesMohamedHinton2013" class="citation book cs1">Graves, Alex; Mohamed, Abdel-rahman; Hinton, Geoffrey (2013). "Speech recognition with deep recurrent neural networks". <i>2013 IEEE International Conference on Acoustics, Speech and Signal Processing</i>. pp.&#160;<span class="nowrap">6645–</span>6649. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1303.5778">1303.5778</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FICASSP.2013.6638947">10.1109/ICASSP.2013.6638947</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-4799-0356-6" title="Special:BookSources/978-1-4799-0356-6"><bdi>978-1-4799-0356-6</bdi></a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:206741496">206741496</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Speech+recognition+with+deep+recurrent+neural+networks&amp;rft.btitle=2013+IEEE+International+Conference+on+Acoustics%2C+Speech+and+Signal+Processing&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E6645-%3C%2Fspan%3E6649&amp;rft.date=2013&amp;rft_id=info%3Aarxiv%2F1303.5778&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A206741496%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1109%2FICASSP.2013.6638947&amp;rft.isbn=978-1-4799-0356-6&amp;rft.aulast=Graves&amp;rft.aufirst=Alex&amp;rft.au=Mohamed%2C+Abdel-rahman&amp;rft.au=Hinton%2C+Geoffrey&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-29"><span class="mw-cite-backlink"><b><a href="#cite_ref-29">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFKratzertKlotzShalevKlambauer2019" class="citation journal cs1">Kratzert, Frederik; Klotz, Daniel; Shalev, Guy; Klambauer, Günter; Hochreiter, Sepp; Nearing, Grey (2019-12-17). <a rel="nofollow" class="external text" href="https://hess.copernicus.org/articles/23/5089/2019/">"Towards learning universal, regional, and local hydrological behaviors via machine learning applied to large-sample datasets"</a>. <i>Hydrology and Earth System Sciences</i>. <b>23</b> (12): <span class="nowrap">5089–</span>5110. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1907.08456">1907.08456</a></span>. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2019HESS...23.5089K">2019HESS...23.5089K</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.5194%2Fhess-23-5089-2019">10.5194/hess-23-5089-2019</a></span>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/1027-5606">1027-5606</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Hydrology+and+Earth+System+Sciences&amp;rft.atitle=Towards+learning+universal%2C+regional%2C+and+local+hydrological+behaviors+via+machine+learning+applied+to+large-sample+datasets&amp;rft.volume=23&amp;rft.issue=12&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E5089-%3C%2Fspan%3E5110&amp;rft.date=2019-12-17&amp;rft_id=info%3Aarxiv%2F1907.08456&amp;rft.issn=1027-5606&amp;rft_id=info%3Adoi%2F10.5194%2Fhess-23-5089-2019&amp;rft_id=info%3Abibcode%2F2019HESS...23.5089K&amp;rft.aulast=Kratzert&amp;rft.aufirst=Frederik&amp;rft.au=Klotz%2C+Daniel&amp;rft.au=Shalev%2C+Guy&amp;rft.au=Klambauer%2C+G%C3%BCnter&amp;rft.au=Hochreiter%2C+Sepp&amp;rft.au=Nearing%2C+Grey&amp;rft_id=https%3A%2F%2Fhess.copernicus.org%2Farticles%2F23%2F5089%2F2019%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-eck2002-30"><span class="mw-cite-backlink"><b><a href="#cite_ref-eck2002_30-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFEckSchmidhuber2002" class="citation book cs1">Eck, Douglas; Schmidhuber, Jürgen (2002-08-28). "Learning the Long-Term Structure of the Blues". <i>Artificial Neural Networks — ICANN 2002</i>. Lecture Notes in Computer Science. Vol.&#160;2415. Springer, Berlin, Heidelberg. pp.&#160;<span class="nowrap">284–</span>289. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.116.3620">10.1.1.116.3620</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F3-540-46084-5_47">10.1007/3-540-46084-5_47</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-3540460848" title="Special:BookSources/978-3540460848"><bdi>978-3540460848</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Learning+the+Long-Term+Structure+of+the+Blues&amp;rft.btitle=Artificial+Neural+Networks+%E2%80%94+ICANN+2002&amp;rft.series=Lecture+Notes+in+Computer+Science&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E284-%3C%2Fspan%3E289&amp;rft.pub=Springer%2C+Berlin%2C+Heidelberg&amp;rft.date=2002-08-28&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.116.3620%23id-name%3DCiteSeerX&amp;rft_id=info%3Adoi%2F10.1007%2F3-540-46084-5_47&amp;rft.isbn=978-3540460848&amp;rft.aulast=Eck&amp;rft.aufirst=Douglas&amp;rft.au=Schmidhuber%2C+J%C3%BCrgen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-gers2002-31"><span class="mw-cite-backlink"><b><a href="#cite_ref-gers2002_31-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSchmidhuberGersEckSchmidhuber2002" class="citation journal cs1">Schmidhuber, J.; Gers, F.; Eck, D.; Schmidhuber, J.; Gers, F. (2002). "Learning nonregular languages: A comparison of simple recurrent networks and LSTM". <i>Neural Computation</i>. <b>14</b> (9): <span class="nowrap">2039–</span>2041. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.11.7369">10.1.1.11.7369</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1162%2F089976602320263980">10.1162/089976602320263980</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/12184841">12184841</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:30459046">30459046</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Neural+Computation&amp;rft.atitle=Learning+nonregular+languages%3A+A+comparison+of+simple+recurrent+networks+and+LSTM&amp;rft.volume=14&amp;rft.issue=9&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E2039-%3C%2Fspan%3E2041&amp;rft.date=2002&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.11.7369%23id-name%3DCiteSeerX&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A30459046%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F12184841&amp;rft_id=info%3Adoi%2F10.1162%2F089976602320263980&amp;rft.aulast=Schmidhuber&amp;rft.aufirst=J.&amp;rft.au=Gers%2C+F.&amp;rft.au=Eck%2C+D.&amp;rft.au=Schmidhuber%2C+J.&amp;rft.au=Gers%2C+F.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-perez2003-32"><span class="mw-cite-backlink"><b><a href="#cite_ref-perez2003_32-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFPerez-OrtizGersEckSchmidhuber2003" class="citation journal cs1">Perez-Ortiz, J. A.; Gers, F. A.; Eck, D.; Schmidhuber, J. (2003). "Kalman filters improve LSTM network performance in problems unsolvable by traditional recurrent nets". <i>Neural Networks</i>. <b>16</b> (2): <span class="nowrap">241–</span>250. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.381.1992">10.1.1.381.1992</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fs0893-6080%2802%2900219-8">10.1016/s0893-6080(02)00219-8</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/12628609">12628609</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Neural+Networks&amp;rft.atitle=Kalman+filters+improve+LSTM+network+performance+in+problems+unsolvable+by+traditional+recurrent+nets&amp;rft.volume=16&amp;rft.issue=2&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E241-%3C%2Fspan%3E250&amp;rft.date=2003&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.381.1992%23id-name%3DCiteSeerX&amp;rft_id=info%3Apmid%2F12628609&amp;rft_id=info%3Adoi%2F10.1016%2Fs0893-6080%2802%2900219-8&amp;rft.aulast=Perez-Ortiz&amp;rft.aufirst=J.+A.&amp;rft.au=Gers%2C+F.+A.&amp;rft.au=Eck%2C+D.&amp;rft.au=Schmidhuber%2C+J.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-graves2009nips-33"><span class="mw-cite-backlink"><b><a href="#cite_ref-graves2009nips_33-0">^</a></b></span> <span class="reference-text">A. Graves, J. Schmidhuber. Offline Handwriting Recognition with Multidimensional Recurrent Neural Networks. Advances in Neural Information Processing Systems 22, NIPS'22, pp 545–552, Vancouver, MIT Press, 2009.</span> </li> <li id="cite_note-34"><span class="mw-cite-backlink"><b><a href="#cite_ref-34">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGravesFernándezLiwickiBunke2007" class="citation journal cs1">Graves, A.; Fernández, S.; Liwicki, M.; Bunke, H.; Schmidhuber, J. (3 December 2007). <a rel="nofollow" class="external text" href="http://dl.acm.org/citation.cfm?id=2981562.2981635">"Unconstrained Online Handwriting Recognition with Recurrent Neural Networks"</a>. <i>Proceedings of the 20th International Conference on Neural Information Processing Systems</i>. NIPS'07. USA: Curran Associates Inc.: <span class="nowrap">577–</span>584. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/9781605603520" title="Special:BookSources/9781605603520"><bdi>9781605603520</bdi></a><span class="reference-accessdate">. Retrieved <span class="nowrap">28 December</span> 2023</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Proceedings+of+the+20th+International+Conference+on+Neural+Information+Processing+Systems&amp;rft.atitle=Unconstrained+Online+Handwriting+Recognition+with+Recurrent+Neural+Networks&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E577-%3C%2Fspan%3E584&amp;rft.date=2007-12-03&amp;rft.isbn=9781605603520&amp;rft.aulast=Graves&amp;rft.aufirst=A.&amp;rft.au=Fern%C3%A1ndez%2C+S.&amp;rft.au=Liwicki%2C+M.&amp;rft.au=Bunke%2C+H.&amp;rft.au=Schmidhuber%2C+J.&amp;rft_id=http%3A%2F%2Fdl.acm.org%2Fcitation.cfm%3Fid%3D2981562.2981635&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-baccouche2011-35"><span class="mw-cite-backlink"><b><a href="#cite_ref-baccouche2011_35-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBaccoucheMamaletWolfGarcia2011" class="citation book cs1">Baccouche, M.; Mamalet, F.; Wolf, C.; Garcia, C.; Baskurt, A. (2011). "Sequential Deep Learning for Human Action Recognition". In Salah, A. A.; Lepri, B. (eds.). <i>2nd International Workshop on Human Behavior Understanding (HBU)</i>. Lecture Notes in Computer Science. Vol.&#160;7065. Amsterdam, Netherlands: Springer. pp.&#160;<span class="nowrap">29–</span>39. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F978-3-642-25446-8_4">10.1007/978-3-642-25446-8_4</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-3-642-25445-1" title="Special:BookSources/978-3-642-25445-1"><bdi>978-3-642-25445-1</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Sequential+Deep+Learning+for+Human+Action+Recognition&amp;rft.btitle=2nd+International+Workshop+on+Human+Behavior+Understanding+%28HBU%29&amp;rft.place=Amsterdam%2C+Netherlands&amp;rft.series=Lecture+Notes+in+Computer+Science&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E29-%3C%2Fspan%3E39&amp;rft.pub=Springer&amp;rft.date=2011&amp;rft_id=info%3Adoi%2F10.1007%2F978-3-642-25446-8_4&amp;rft.isbn=978-3-642-25445-1&amp;rft.aulast=Baccouche&amp;rft.aufirst=M.&amp;rft.au=Mamalet%2C+F.&amp;rft.au=Wolf%2C+C.&amp;rft.au=Garcia%2C+C.&amp;rft.au=Baskurt%2C+A.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-huang2018-36"><span class="mw-cite-backlink"><b><a href="#cite_ref-huang2018_36-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHuangZhouZhangLi2018" class="citation arxiv cs1">Huang, Jie; Zhou, Wengang; Zhang, Qilin; Li, Houqiang; Li, Weiping (2018-01-30). "Video-based Sign Language Recognition without Temporal Segmentation". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1801.10111">1801.10111</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CV">cs.CV</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Video-based+Sign+Language+Recognition+without+Temporal+Segmentation&amp;rft.date=2018-01-30&amp;rft_id=info%3Aarxiv%2F1801.10111&amp;rft.aulast=Huang&amp;rft.aufirst=Jie&amp;rft.au=Zhou%2C+Wengang&amp;rft.au=Zhang%2C+Qilin&amp;rft.au=Li%2C+Houqiang&amp;rft.au=Li%2C+Weiping&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-hochreiter2007-37"><span class="mw-cite-backlink">^ <a href="#cite_ref-hochreiter2007_37-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-hochreiter2007_37-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHochreiterHeuselObermayer2007" class="citation journal cs1">Hochreiter, S.; Heusel, M.; Obermayer, K. (2007). <a rel="nofollow" class="external text" href="https://doi.org/10.1093%2Fbioinformatics%2Fbtm247">"Fast model-based protein homology detection without alignment"</a>. <i>Bioinformatics</i>. <b>23</b> (14): <span class="nowrap">1728–</span>1736. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.1093%2Fbioinformatics%2Fbtm247">10.1093/bioinformatics/btm247</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/17488755">17488755</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Bioinformatics&amp;rft.atitle=Fast+model-based+protein+homology+detection+without+alignment&amp;rft.volume=23&amp;rft.issue=14&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1728-%3C%2Fspan%3E1736&amp;rft.date=2007&amp;rft_id=info%3Adoi%2F10.1093%2Fbioinformatics%2Fbtm247&amp;rft_id=info%3Apmid%2F17488755&amp;rft.aulast=Hochreiter&amp;rft.aufirst=S.&amp;rft.au=Heusel%2C+M.&amp;rft.au=Obermayer%2C+K.&amp;rft_id=https%3A%2F%2Fdoi.org%2F10.1093%252Fbioinformatics%252Fbtm247&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-thireou2007-38"><span class="mw-cite-backlink"><b><a href="#cite_ref-thireou2007_38-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFThireouReczko2007" class="citation journal cs1">Thireou, T.; Reczko, M. (2007). "Bidirectional Long Short-Term Memory Networks for predicting the subcellular localization of eukaryotic proteins". <i>IEEE/ACM Transactions on Computational Biology and Bioinformatics</i>. <b>4</b> (3): <span class="nowrap">441–</span>446. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2Ftcbb.2007.1015">10.1109/tcbb.2007.1015</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/17666763">17666763</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:11787259">11787259</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=IEEE%2FACM+Transactions+on+Computational+Biology+and+Bioinformatics&amp;rft.atitle=Bidirectional+Long+Short-Term+Memory+Networks+for+predicting+the+subcellular+localization+of+eukaryotic+proteins&amp;rft.volume=4&amp;rft.issue=3&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E441-%3C%2Fspan%3E446&amp;rft.date=2007&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A11787259%23id-name%3DS2CID&amp;rft_id=info%3Apmid%2F17666763&amp;rft_id=info%3Adoi%2F10.1109%2Ftcbb.2007.1015&amp;rft.aulast=Thireou&amp;rft.aufirst=T.&amp;rft.au=Reczko%2C+M.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-malhotra2015-39"><span class="mw-cite-backlink"><b><a href="#cite_ref-malhotra2015_39-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMalhotraVigShroffAgarwal2015" class="citation journal cs1">Malhotra, Pankaj; Vig, Lovekesh; Shroff, Gautam; Agarwal, Puneet (April 2015). <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201030224634/https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2015-56.pdf">"Long Short Term Memory Networks for Anomaly Detection in Time Series"</a> <span class="cs1-format">(PDF)</span>. <i>European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning — ESANN 2015</i>. Archived from <a rel="nofollow" class="external text" href="https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2015-56.pdf">the original</a> <span class="cs1-format">(PDF)</span> on 2020-10-30<span class="reference-accessdate">. Retrieved <span class="nowrap">2018-02-21</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=European+Symposium+on+Artificial+Neural+Networks%2C+Computational+Intelligence+and+Machine+Learning+%E2%80%94+ESANN+2015&amp;rft.atitle=Long+Short+Term+Memory+Networks+for+Anomaly+Detection+in+Time+Series&amp;rft.date=2015-04&amp;rft.aulast=Malhotra&amp;rft.aufirst=Pankaj&amp;rft.au=Vig%2C+Lovekesh&amp;rft.au=Shroff%2C+Gautam&amp;rft.au=Agarwal%2C+Puneet&amp;rft_id=https%3A%2F%2Fwww.elen.ucl.ac.be%2FProceedings%2Fesann%2Fesannpdf%2Fes2015-56.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-tax2017-40"><span class="mw-cite-backlink"><b><a href="#cite_ref-tax2017_40-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFTaxVerenichLa_RosaDumas2017" class="citation book cs1">Tax, N.; Verenich, I.; La Rosa, M.; Dumas, M. (2017). "Predictive Business Process Monitoring with LSTM Neural Networks". <i>Advanced Information Systems Engineering</i>. Lecture Notes in Computer Science. Vol.&#160;10253. pp.&#160;<span class="nowrap">477–</span>492. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1612.02130">1612.02130</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F978-3-319-59536-8_30">10.1007/978-3-319-59536-8_30</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-3-319-59535-1" title="Special:BookSources/978-3-319-59535-1"><bdi>978-3-319-59535-1</bdi></a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:2192354">2192354</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Predictive+Business+Process+Monitoring+with+LSTM+Neural+Networks&amp;rft.btitle=Advanced+Information+Systems+Engineering&amp;rft.series=Lecture+Notes+in+Computer+Science&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E477-%3C%2Fspan%3E492&amp;rft.date=2017&amp;rft_id=info%3Aarxiv%2F1612.02130&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A2192354%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1007%2F978-3-319-59536-8_30&amp;rft.isbn=978-3-319-59535-1&amp;rft.aulast=Tax&amp;rft.aufirst=N.&amp;rft.au=Verenich%2C+I.&amp;rft.au=La+Rosa%2C+M.&amp;rft.au=Dumas%2C+M.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-choi2016-41"><span class="mw-cite-backlink"><b><a href="#cite_ref-choi2016_41-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFChoiBahadoriSchuetzStewart2016" class="citation journal cs1">Choi, E.; Bahadori, M.T.; Schuetz, E.; Stewart, W.; Sun, J. (2016). <a rel="nofollow" class="external text" href="http://proceedings.mlr.press/v56/Choi16.html">"Doctor AI: Predicting Clinical Events via Recurrent Neural Networks"</a>. <i>JMLR Workshop and Conference Proceedings</i>. <b>56</b>: <span class="nowrap">301–</span>318. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1511.05942">1511.05942</a></span>. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2015arXiv151105942C">2015arXiv151105942C</a>. <a href="/wiki/PMC_(identifier)" class="mw-redirect" title="PMC (identifier)">PMC</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5341604">5341604</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/28286600">28286600</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=JMLR+Workshop+and+Conference+Proceedings&amp;rft.atitle=Doctor+AI%3A+Predicting+Clinical+Events+via+Recurrent+Neural+Networks&amp;rft.volume=56&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E301-%3C%2Fspan%3E318&amp;rft.date=2016&amp;rft_id=info%3Aarxiv%2F1511.05942&amp;rft_id=info%3Apmid%2F28286600&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC5341604%23id-name%3DPMC&amp;rft_id=info%3Abibcode%2F2015arXiv151105942C&amp;rft.aulast=Choi&amp;rft.aufirst=E.&amp;rft.au=Bahadori%2C+M.T.&amp;rft.au=Schuetz%2C+E.&amp;rft.au=Stewart%2C+W.&amp;rft.au=Sun%2C+J.&amp;rft_id=http%3A%2F%2Fproceedings.mlr.press%2Fv56%2FChoi16.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-jia2016-42"><span class="mw-cite-backlink"><b><a href="#cite_ref-jia2016_42-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFJiaLiang2016" class="citation arxiv cs1">Jia, Robin; Liang, Percy (2016). "Data Recombination for Neural Semantic Parsing". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1606.03622">1606.03622</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Data+Recombination+for+Neural+Semantic+Parsing&amp;rft.date=2016&amp;rft_id=info%3Aarxiv%2F1606.03622&amp;rft.aulast=Jia&amp;rft.aufirst=Robin&amp;rft.au=Liang%2C+Percy&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-Wang_Duan_Zhang_Niu_p=1657-43"><span class="mw-cite-backlink"><b><a href="#cite_ref-Wang_Duan_Zhang_Niu_p=1657_43-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWangDuanZhangNiu2018" class="citation journal cs1">Wang, Le; Duan, Xuhuan; Zhang, Qilin; Niu, Zhenxing; Hua, Gang; Zheng, Nanning (2018-05-22). <a rel="nofollow" class="external text" href="https://qilin-zhang.github.io/_pages/pdfs/Segment-Tube_Spatio-Temporal_Action_Localization_in_Untrimmed_Videos_with_Per-Frame_Segmentation.pdf">"Segment-Tube: Spatio-Temporal Action Localization in Untrimmed Videos with Per-Frame Segmentation"</a> <span class="cs1-format">(PDF)</span>. <i>Sensors</i>. <b>18</b> (5): 1657. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2018Senso..18.1657W">2018Senso..18.1657W</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.3390%2Fs18051657">10.3390/s18051657</a></span>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/1424-8220">1424-8220</a>. <a href="/wiki/PMC_(identifier)" class="mw-redirect" title="PMC (identifier)">PMC</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5982167">5982167</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/29789447">29789447</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Sensors&amp;rft.atitle=Segment-Tube%3A+Spatio-Temporal+Action+Localization+in+Untrimmed+Videos+with+Per-Frame+Segmentation&amp;rft.volume=18&amp;rft.issue=5&amp;rft.pages=1657&amp;rft.date=2018-05-22&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC5982167%23id-name%3DPMC&amp;rft_id=info%3Abibcode%2F2018Senso..18.1657W&amp;rft_id=info%3Apmid%2F29789447&amp;rft_id=info%3Adoi%2F10.3390%2Fs18051657&amp;rft.issn=1424-8220&amp;rft.aulast=Wang&amp;rft.aufirst=Le&amp;rft.au=Duan%2C+Xuhuan&amp;rft.au=Zhang%2C+Qilin&amp;rft.au=Niu%2C+Zhenxing&amp;rft.au=Hua%2C+Gang&amp;rft.au=Zheng%2C+Nanning&amp;rft_id=https%3A%2F%2Fqilin-zhang.github.io%2F_pages%2Fpdfs%2FSegment-Tube_Spatio-Temporal_Action_Localization_in_Untrimmed_Videos_with_Per-Frame_Segmentation.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-Duan_Wang_Zhai_Zheng_2018_p.-44"><span class="mw-cite-backlink"><b><a href="#cite_ref-Duan_Wang_Zhai_Zheng_2018_p._44-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFDuanWangZhaiZheng2018" class="citation conference cs1">Duan, Xuhuan; Wang, Le; Zhai, Changbo; Zheng, Nanning; Zhang, Qilin; Niu, Zhenxing; Hua, Gang (2018). "Joint Spatio-Temporal Action Localization in Untrimmed Videos with Per-Frame Segmentation". <i>2018 25th IEEE International Conference on Image Processing (ICIP)</i>. 25th IEEE International Conference on Image Processing (ICIP). pp.&#160;<span class="nowrap">918–</span>922. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2Ficip.2018.8451692">10.1109/icip.2018.8451692</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-4799-7061-2" title="Special:BookSources/978-1-4799-7061-2"><bdi>978-1-4799-7061-2</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Joint+Spatio-Temporal+Action+Localization+in+Untrimmed+Videos+with+Per-Frame+Segmentation&amp;rft.btitle=2018+25th+IEEE+International+Conference+on+Image+Processing+%28ICIP%29&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E918-%3C%2Fspan%3E922&amp;rft.pub=25th+IEEE+International+Conference+on+Image+Processing+%28ICIP%29&amp;rft.date=2018&amp;rft_id=info%3Adoi%2F10.1109%2Ficip.2018.8451692&amp;rft.isbn=978-1-4799-7061-2&amp;rft.aulast=Duan&amp;rft.aufirst=Xuhuan&amp;rft.au=Wang%2C+Le&amp;rft.au=Zhai%2C+Changbo&amp;rft.au=Zheng%2C+Nanning&amp;rft.au=Zhang%2C+Qilin&amp;rft.au=Niu%2C+Zhenxing&amp;rft.au=Hua%2C+Gang&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-orsini2019-45"><span class="mw-cite-backlink"><b><a href="#cite_ref-orsini2019_45-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFOrsiniGastaldiMantecchiniRossi2019" class="citation conference cs1">Orsini, F.; Gastaldi, M.; Mantecchini, L.; Rossi, R. (2019). <i>Neural networks trained with WiFi traces to predict airport passenger behavior</i>. 6th International Conference on Models and Technologies for Intelligent Transportation Systems. Krakow: IEEE. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1910.14026">1910.14026</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FMTITS.2019.8883365">10.1109/MTITS.2019.8883365</a>. 8883365.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Neural+networks+trained+with+WiFi+traces+to+predict+airport+passenger+behavior&amp;rft.place=Krakow&amp;rft.pub=IEEE&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1910.14026&amp;rft_id=info%3Adoi%2F10.1109%2FMTITS.2019.8883365&amp;rft.aulast=Orsini&amp;rft.aufirst=F.&amp;rft.au=Gastaldi%2C+M.&amp;rft.au=Mantecchini%2C+L.&amp;rft.au=Rossi%2C+R.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-liu2017-46"><span class="mw-cite-backlink"><b><a href="#cite_ref-liu2017_46-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFZhaoChenWuChen2017" class="citation journal cs1">Zhao, Z.; Chen, W.; Wu, X.; Chen, P.C.Y.; Liu, J. (2017). "LSTM network: A deep learning approach for Short-term traffic forecast". <i>IET Intelligent Transport Systems</i>. <b>11</b> (2): <span class="nowrap">68–</span>75. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1049%2Fiet-its.2016.0208">10.1049/iet-its.2016.0208</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:114567527">114567527</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=IET+Intelligent+Transport+Systems&amp;rft.atitle=LSTM+network%3A+A+deep+learning+approach+for+Short-term+traffic+forecast&amp;rft.volume=11&amp;rft.issue=2&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E68-%3C%2Fspan%3E75&amp;rft.date=2017&amp;rft_id=info%3Adoi%2F10.1049%2Fiet-its.2016.0208&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A114567527%23id-name%3DS2CID&amp;rft.aulast=Zhao&amp;rft.aufirst=Z.&amp;rft.au=Chen%2C+W.&amp;rft.au=Wu%2C+X.&amp;rft.au=Chen%2C+P.C.Y.&amp;rft.au=Liu%2C+J.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-pmid29095571-47"><span class="mw-cite-backlink"><b><a href="#cite_ref-pmid29095571_47-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGupta_A,_Müller_AT,_Huisman_BJH,_Fuchs_JA,_Schneider_P,_Schneider_G2018" class="citation journal cs1">Gupta A, Müller AT, Huisman BJH, Fuchs JA, Schneider P, Schneider G (2018). <a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5836943">"Generative Recurrent Networks for De Novo Drug Design"</a>. <i>Mol Inform</i>. <b>37</b> (<span class="nowrap">1–</span>2). <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1002%2Fminf.201700111">10.1002/minf.201700111</a>. <a href="/wiki/PMC_(identifier)" class="mw-redirect" title="PMC (identifier)">PMC</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5836943">5836943</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/29095571">29095571</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Mol+Inform&amp;rft.atitle=Generative+Recurrent+Networks+for+De+Novo+Drug+Design.&amp;rft.volume=37&amp;rft.issue=%3Cspan+class%3D%22nowrap%22%3E1%E2%80%93%3C%2Fspan%3E2&amp;rft.date=2018&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC5836943%23id-name%3DPMC&amp;rft_id=info%3Apmid%2F29095571&amp;rft_id=info%3Adoi%2F10.1002%2Fminf.201700111&amp;rft.au=Gupta+A%2C+M%C3%BCller+AT%2C+Huisman+BJH%2C+Fuchs+JA%2C+Schneider+P%2C+Schneider+G&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC5836943&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span><span class="cs1-maint citation-comment"><code class="cs1-code">{{<a href="/wiki/Template:Cite_journal" title="Template:Cite journal">cite journal</a>}}</code>: CS1 maint: multiple names: authors list (<a href="/wiki/Category:CS1_maint:_multiple_names:_authors_list" title="Category:CS1 maint: multiple names: authors list">link</a>)</span></span> </li> <li id="cite_note-saiful2020-48"><span class="mw-cite-backlink"><b><a href="#cite_ref-saiful2020_48-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSaiful_IslamHossain2020" class="citation journal cs1">Saiful Islam, Md.; Hossain, Emam (2020-10-26). <a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.socl.2020.100009">"Foreign Exchange Currency Rate Prediction using a GRU-LSTM Hybrid Network"</a>. <i>Soft Computing Letters</i>. <b>3</b>: 100009. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.socl.2020.100009">10.1016/j.socl.2020.100009</a></span>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/2666-2221">2666-2221</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Soft+Computing+Letters&amp;rft.atitle=Foreign+Exchange+Currency+Rate+Prediction+using+a+GRU-LSTM+Hybrid+Network&amp;rft.volume=3&amp;rft.pages=100009&amp;rft.date=2020-10-26&amp;rft_id=info%3Adoi%2F10.1016%2Fj.socl.2020.100009&amp;rft.issn=2666-2221&amp;rft.aulast=Saiful+Islam&amp;rft.aufirst=Md.&amp;rft.au=Hossain%2C+Emam&amp;rft_id=https%3A%2F%2Fdoi.org%2F10.1016%252Fj.socl.2020.100009&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-renamed_from_2023_on_20240120110022-49"><span class="mw-cite-backlink"><b><a href="#cite_ref-renamed_from_2023_on_20240120110022_49-0">^</a></b></span> <span class="reference-text">{{Cite Abbey Martin, Andrew J. Hill, Konstantin M. Seiler &amp; Mehala Balamurali (2023) Automatic excavator action recognition and localisation for untrimmed video using hybrid LSTM-Transformer networks, International Journal of Mining, Reclamation and Environment, DOI: 10.1080/17480930.2023.2290364}}</span> </li> <li id="cite_note-Beau15-50"><span class="mw-cite-backlink"><b><a href="#cite_ref-Beau15_50-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBeaufays2015" class="citation news cs1">Beaufays, Françoise (August 11, 2015). <a rel="nofollow" class="external text" href="http://googleresearch.blogspot.co.at/2015/08/the-neural-networks-behind-google-voice.html">"The neural networks behind Google Voice transcription"</a>. <i>Research Blog</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2017-06-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Research+Blog&amp;rft.atitle=The+neural+networks+behind+Google+Voice+transcription&amp;rft.date=2015-08-11&amp;rft.aulast=Beaufays&amp;rft.aufirst=Fran%C3%A7oise&amp;rft_id=http%3A%2F%2Fgoogleresearch.blogspot.co.at%2F2015%2F08%2Fthe-neural-networks-behind-google-voice.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-GoogleVoiceSearch-51"><span class="mw-cite-backlink"><b><a href="#cite_ref-GoogleVoiceSearch_51-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSakSeniorRaoBeaufays2015" class="citation news cs1">Sak, Haşim; Senior, Andrew; Rao, Kanishka; Beaufays, Françoise; Schalkwyk, Johan (September 24, 2015). <a rel="nofollow" class="external text" href="http://googleresearch.blogspot.co.uk/2015/09/google-voice-search-faster-and-more.html">"Google voice search: faster and more accurate"</a>. <i>Research Blog</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2017-06-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Research+Blog&amp;rft.atitle=Google+voice+search%3A+faster+and+more+accurate&amp;rft.date=2015-09-24&amp;rft.aulast=Sak&amp;rft.aufirst=Ha%C5%9Fim&amp;rft.au=Senior%2C+Andrew&amp;rft.au=Rao%2C+Kanishka&amp;rft.au=Beaufays%2C+Fran%C3%A7oise&amp;rft.au=Schalkwyk%2C+Johan&amp;rft_id=http%3A%2F%2Fgoogleresearch.blogspot.co.uk%2F2015%2F09%2Fgoogle-voice-search-faster-and-more.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-googleblog2015-52"><span class="mw-cite-backlink"><b><a href="#cite_ref-googleblog2015_52-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://googleblog.blogspot.com/2015/07/neon-prescription-or-rather-new.html">"Neon prescription... or rather, New transcription for Google Voice"</a>. <i>Official Google Blog</i>. 23 July 2015<span class="reference-accessdate">. Retrieved <span class="nowrap">2020-04-25</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Official+Google+Blog&amp;rft.atitle=Neon+prescription...+or+rather%2C+New+transcription+for+Google+Voice&amp;rft.date=2015-07-23&amp;rft_id=https%3A%2F%2Fgoogleblog.blogspot.com%2F2015%2F07%2Fneon-prescription-or-rather-new.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-GoogleAllo-53"><span class="mw-cite-backlink"><b><a href="#cite_ref-GoogleAllo_53-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFKhaitan2016" class="citation news cs1">Khaitan, Pranav (May 18, 2016). <a rel="nofollow" class="external text" href="http://googleresearch.blogspot.co.at/2016/05/chat-smarter-with-allo.html">"Chat Smarter with Allo"</a>. <i>Research Blog</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2017-06-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Research+Blog&amp;rft.atitle=Chat+Smarter+with+Allo&amp;rft.date=2016-05-18&amp;rft.aulast=Khaitan&amp;rft.aufirst=Pranav&amp;rft_id=http%3A%2F%2Fgoogleresearch.blogspot.co.at%2F2016%2F05%2Fchat-smarter-with-allo.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-WiredGoogleTranslate-54"><span class="mw-cite-backlink"><b><a href="#cite_ref-WiredGoogleTranslate_54-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMetz2016" class="citation magazine cs1">Metz, Cade (September 27, 2016). <a rel="nofollow" class="external text" href="https://www.wired.com/2016/09/google-claims-ai-breakthrough-machine-translation/">"An Infusion of AI Makes Google Translate More Powerful Than Ever | WIRED"</a>. <i>Wired</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2017-06-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Wired&amp;rft.atitle=An+Infusion+of+AI+Makes+Google+Translate+More+Powerful+Than+Ever+%7C+WIRED&amp;rft.date=2016-09-27&amp;rft.aulast=Metz&amp;rft.aufirst=Cade&amp;rft_id=https%3A%2F%2Fwww.wired.com%2F2016%2F09%2Fgoogle-claims-ai-breakthrough-machine-translation%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-googleblog2016-55"><span class="mw-cite-backlink"><b><a href="#cite_ref-googleblog2016_55-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://ai.googleblog.com/2016/09/a-neural-network-for-machine.html">"A Neural Network for Machine Translation, at Production Scale"</a>. <i>Google AI Blog</i>. 27 September 2016<span class="reference-accessdate">. Retrieved <span class="nowrap">2020-04-25</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Google+AI+Blog&amp;rft.atitle=A+Neural+Network+for+Machine+Translation%2C+at+Production+Scale&amp;rft.date=2016-09-27&amp;rft_id=http%3A%2F%2Fai.googleblog.com%2F2016%2F09%2Fa-neural-network-for-machine.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-AppleQuicktype-56"><span class="mw-cite-backlink"><b><a href="#cite_ref-AppleQuicktype_56-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFEfrati2016" class="citation web cs1">Efrati, Amir (June 13, 2016). <a rel="nofollow" class="external text" href="https://www.theinformation.com/apples-machines-can-learn-too">"Apple's Machines Can Learn Too"</a>. <i>The Information</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2017-06-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=The+Information&amp;rft.atitle=Apple%27s+Machines+Can+Learn+Too&amp;rft.date=2016-06-13&amp;rft.aulast=Efrati&amp;rft.aufirst=Amir&amp;rft_id=https%3A%2F%2Fwww.theinformation.com%2Fapples-machines-can-learn-too&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-AppleQuicktype2-57"><span class="mw-cite-backlink"><b><a href="#cite_ref-AppleQuicktype2_57-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFRanger2016" class="citation news cs1">Ranger, Steve (June 14, 2016). <a rel="nofollow" class="external text" href="https://www.zdnet.com/article/ai-big-data-and-the-iphone-heres-how-apple-plans-to-protect-your-privacy/">"iPhone, AI and big data: Here's how Apple plans to protect your privacy"</a>. <i>ZDNet</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2017-06-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=ZDNet&amp;rft.atitle=iPhone%2C+AI+and+big+data%3A+Here%27s+how+Apple+plans+to+protect+your+privacy&amp;rft.date=2016-06-14&amp;rft.aulast=Ranger&amp;rft.aufirst=Steve&amp;rft_id=https%3A%2F%2Fwww.zdnet.com%2Farticle%2Fai-big-data-and-the-iphone-heres-how-apple-plans-to-protect-your-privacy%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-58"><span class="mw-cite-backlink"><b><a href="#cite_ref-58">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://machinelearning.apple.com/2018/09/27/can-global-semantic-context-improve-neural-language-models.html">"Can Global Semantic Context Improve Neural Language Models? – Apple"</a>. <i>Apple Machine Learning Journal</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2020-04-30</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Apple+Machine+Learning+Journal&amp;rft.atitle=Can+Global+Semantic+Context+Improve+Neural+Language+Models%3F+%E2%80%93+Apple&amp;rft_id=https%3A%2F%2Fmachinelearning.apple.com%2F2018%2F09%2F27%2Fcan-global-semantic-context-improve-neural-language-models.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-AppleSiri-59"><span class="mw-cite-backlink"><b><a href="#cite_ref-AppleSiri_59-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSmith2016" class="citation web cs1">Smith, Chris (2016-06-13). <a rel="nofollow" class="external text" href="http://bgr.com/2016/06/13/ios-10-siri-third-party-apps/">"iOS 10: Siri now works in third-party apps, comes with extra AI features"</a>. <i>BGR</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2017-06-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=BGR&amp;rft.atitle=iOS+10%3A+Siri+now+works+in+third-party+apps%2C+comes+with+extra+AI+features&amp;rft.date=2016-06-13&amp;rft.aulast=Smith&amp;rft.aufirst=Chris&amp;rft_id=http%3A%2F%2Fbgr.com%2F2016%2F06%2F13%2Fios-10-siri-third-party-apps%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-capes2017-60"><span class="mw-cite-backlink"><b><a href="#cite_ref-capes2017_60-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFCapesColesConkieGolipour2017" class="citation journal cs1">Capes, Tim; Coles, Paul; Conkie, Alistair; Golipour, Ladan; Hadjitarkhani, Abie; Hu, Qiong; Huddleston, Nancy; Hunt, Melvyn; Li, Jiangchuan; Neeracher, Matthias; Prahallad, Kishore (2017-08-20). <a rel="nofollow" class="external text" href="http://www.isca-speech.org/archive/Interspeech_2017/abstracts/1798.html">"Siri On-Device Deep Learning-Guided Unit Selection Text-to-Speech System"</a>. <i>Interspeech 2017</i>. ISCA: <span class="nowrap">4011–</span>4015. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.21437%2FInterspeech.2017-1798">10.21437/Interspeech.2017-1798</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Interspeech+2017&amp;rft.atitle=Siri+On-Device+Deep+Learning-Guided+Unit+Selection+Text-to-Speech+System&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E4011-%3C%2Fspan%3E4015&amp;rft.date=2017-08-20&amp;rft_id=info%3Adoi%2F10.21437%2FInterspeech.2017-1798&amp;rft.aulast=Capes&amp;rft.aufirst=Tim&amp;rft.au=Coles%2C+Paul&amp;rft.au=Conkie%2C+Alistair&amp;rft.au=Golipour%2C+Ladan&amp;rft.au=Hadjitarkhani%2C+Abie&amp;rft.au=Hu%2C+Qiong&amp;rft.au=Huddleston%2C+Nancy&amp;rft.au=Hunt%2C+Melvyn&amp;rft.au=Li%2C+Jiangchuan&amp;rft.au=Neeracher%2C+Matthias&amp;rft.au=Prahallad%2C+Kishore&amp;rft_id=http%3A%2F%2Fwww.isca-speech.org%2Farchive%2FInterspeech_2017%2Fabstracts%2F1798.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-AmazonAlexa-61"><span class="mw-cite-backlink"><b><a href="#cite_ref-AmazonAlexa_61-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFVogels2016" class="citation web cs1">Vogels, Werner (30 November 2016). <a rel="nofollow" class="external text" href="http://www.allthingsdistributed.com/2016/11/amazon-ai-and-alexa-for-all-aws-apps.html">"Bringing the Magic of Amazon AI and Alexa to Apps on AWS. – All Things Distributed"</a>. <i>www.allthingsdistributed.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2017-06-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=www.allthingsdistributed.com&amp;rft.atitle=Bringing+the+Magic+of+Amazon+AI+and+Alexa+to+Apps+on+AWS.+%E2%80%93+All+Things+Distributed&amp;rft.date=2016-11-30&amp;rft.aulast=Vogels&amp;rft.aufirst=Werner&amp;rft_id=http%3A%2F%2Fwww.allthingsdistributed.com%2F2016%2F11%2Famazon-ai-and-alexa-for-all-aws-apps.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-62"><span class="mw-cite-backlink"><b><a href="#cite_ref-62">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFXiongWuAllevaDroppo2018" class="citation book cs1">Xiong, W.; Wu, L.; Alleva, F.; Droppo, J.; Huang, X.; Stolcke, A. (April 2018). <a rel="nofollow" class="external text" href="https://ieeexplore.ieee.org/document/8461870">"The Microsoft 2017 Conversational Speech Recognition System"</a>. <i>2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE. pp.&#160;<span class="nowrap">5934–</span>5938. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1708.06073">1708.06073</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FICASSP.2018.8461870">10.1109/ICASSP.2018.8461870</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-5386-4658-8" title="Special:BookSources/978-1-5386-4658-8"><bdi>978-1-5386-4658-8</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=The+Microsoft+2017+Conversational+Speech+Recognition+System&amp;rft.btitle=2018+IEEE+International+Conference+on+Acoustics%2C+Speech+and+Signal+Processing+%28ICASSP%29&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E5934-%3C%2Fspan%3E5938&amp;rft.pub=IEEE&amp;rft.date=2018-04&amp;rft_id=info%3Aarxiv%2F1708.06073&amp;rft_id=info%3Adoi%2F10.1109%2FICASSP.2018.8461870&amp;rft.isbn=978-1-5386-4658-8&amp;rft.aulast=Xiong&amp;rft.aufirst=W.&amp;rft.au=Wu%2C+L.&amp;rft.au=Alleva%2C+F.&amp;rft.au=Droppo%2C+J.&amp;rft.au=Huang%2C+X.&amp;rft.au=Stolcke%2C+A.&amp;rft_id=https%3A%2F%2Fieeexplore.ieee.org%2Fdocument%2F8461870&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-miraculous2021-63"><span class="mw-cite-backlink">^ <a href="#cite_ref-miraculous2021_63-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-miraculous2021_63-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-miraculous2021_63-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-miraculous2021_63-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-miraculous2021_63-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-miraculous2021_63-5"><sup><i><b>f</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSchmidhuber,_Juergen2021" class="citation arxiv cs1">Schmidhuber, Juergen (10 May 2021). "Deep Learning: Our Miraculous Year 1990-1991". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2005.05744">2005.05744</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.NE">cs.NE</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Deep+Learning%3A+Our+Miraculous+Year+1990-1991&amp;rft.date=2021-05-10&amp;rft_id=info%3Aarxiv%2F2005.05744&amp;rft.au=Schmidhuber%2C+Juergen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-mozer1989-64"><span class="mw-cite-backlink"><b><a href="#cite_ref-mozer1989_64-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMozer1989" class="citation journal cs1">Mozer, Mike (1989). "A Focused Backpropagation Algorithm for Temporal Pattern Recognition". <i>Complex Systems</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Complex+Systems&amp;rft.atitle=A+Focused+Backpropagation+Algorithm+for+Temporal+Pattern+Recognition&amp;rft.date=1989&amp;rft.aulast=Mozer&amp;rft.aufirst=Mike&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-DLhistory-65"><span class="mw-cite-backlink"><b><a href="#cite_ref-DLhistory_65-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSchmidhuber2022" class="citation arxiv cs1"><a href="/wiki/Juergen_Schmidhuber" class="mw-redirect" title="Juergen Schmidhuber">Schmidhuber, Juergen</a> (2022). "Annotated History of Modern AI and Deep Learning". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2212.11279">2212.11279</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.NE">cs.NE</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Annotated+History+of+Modern+AI+and+Deep+Learning&amp;rft.date=2022&amp;rft_id=info%3Aarxiv%2F2212.11279&amp;rft.aulast=Schmidhuber&amp;rft.aufirst=Juergen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-66"><span class="mw-cite-backlink"><b><a href="#cite_ref-66">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSepp_HochreiterJürgen_Schmidhuber1995" class="citation cs2"><a href="/wiki/Sepp_Hochreiter" title="Sepp Hochreiter">Sepp Hochreiter</a>; <a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a> (21 August 1995), <a rel="nofollow" class="external text" href="ftp://ftp.idsia.ch/pub/juergen/fki-207-95.ps.gz"><i>Long Short Term Memory</i></a>, <a href="/wiki/WDQ_(identifier)" class="mw-redirect" title="WDQ (identifier)">Wikidata</a>&#160;<a href="https://www.wikidata.org/wiki/Q98967430" class="extiw" title="d:Q98967430">Q98967430</a></cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Long+Short+Term+Memory&amp;rft.date=1995-08-21&amp;rft.au=Sepp+Hochreiter&amp;rft.au=J%C3%BCrgen+Schmidhuber&amp;rft_id=ftp%3A%2F%2Fftp.idsia.ch%2Fpub%2Fjuergen%2Ffki-207-95.ps.gz&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-lstm1999-67"><span class="mw-cite-backlink">^ <a href="#cite_ref-lstm1999_67-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-lstm1999_67-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-lstm1999_67-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGersSchmidhuberCummins1999" class="citation book cs1">Gers, Felix; Schmidhuber, Jürgen; Cummins, Fred (1999). "Learning to forget: Continual prediction with LSTM". <i>9th International Conference on Artificial Neural Networks: ICANN '99</i>. Vol.&#160;1999. pp.&#160;<span class="nowrap">850–</span>855. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1049%2Fcp%3A19991218">10.1049/cp:19991218</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/0-85296-721-7" title="Special:BookSources/0-85296-721-7"><bdi>0-85296-721-7</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Learning+to+forget%3A+Continual+prediction+with+LSTM&amp;rft.btitle=9th+International+Conference+on+Artificial+Neural+Networks%3A+ICANN+%2799&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E850-%3C%2Fspan%3E855&amp;rft.date=1999&amp;rft_id=info%3Adoi%2F10.1049%2Fcp%3A19991218&amp;rft.isbn=0-85296-721-7&amp;rft.aulast=Gers&amp;rft.aufirst=Felix&amp;rft.au=Schmidhuber%2C+J%C3%BCrgen&amp;rft.au=Cummins%2C+Fred&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-cho2014-68"><span class="mw-cite-backlink"><b><a href="#cite_ref-cho2014_68-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFChovan_MerrienboerGulcehreBahdanau2014" class="citation arxiv cs1">Cho, Kyunghyun; van Merrienboer, Bart; Gulcehre, Caglar; Bahdanau, Dzmitry; Bougares, Fethi; Schwenk, Holger; Bengio, Yoshua (2014). "Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1406.1078">1406.1078</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Learning+Phrase+Representations+using+RNN+Encoder-Decoder+for+Statistical+Machine+Translation&amp;rft.date=2014&amp;rft_id=info%3Aarxiv%2F1406.1078&amp;rft.aulast=Cho&amp;rft.aufirst=Kyunghyun&amp;rft.au=van+Merrienboer%2C+Bart&amp;rft.au=Gulcehre%2C+Caglar&amp;rft.au=Bahdanau%2C+Dzmitry&amp;rft.au=Bougares%2C+Fethi&amp;rft.au=Schwenk%2C+Holger&amp;rft.au=Bengio%2C+Yoshua&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-highway2015-69"><span class="mw-cite-backlink"><b><a href="#cite_ref-highway2015_69-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSrivastavaGreffSchmidhuber2015" class="citation arxiv cs1">Srivastava, Rupesh Kumar; Greff, Klaus; Schmidhuber, Jürgen (2 May 2015). "Highway Networks". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1505.00387">1505.00387</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Highway+Networks&amp;rft.date=2015-05-02&amp;rft_id=info%3Aarxiv%2F1505.00387&amp;rft.aulast=Srivastava&amp;rft.aufirst=Rupesh+Kumar&amp;rft.au=Greff%2C+Klaus&amp;rft.au=Schmidhuber%2C+J%C3%BCrgen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-highway2015neurips-70"><span class="mw-cite-backlink"><b><a href="#cite_ref-highway2015neurips_70-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSrivastavaGreffSchmidhuber2015" class="citation journal cs1">Srivastava, Rupesh K; Greff, Klaus; Schmidhuber, Juergen (2015). <a rel="nofollow" class="external text" href="http://papers.nips.cc/paper/5850-training-very-deep-networks">"Training Very Deep Networks"</a>. <i>Advances in Neural Information Processing Systems</i>. <b>28</b>. Curran Associates, Inc.: <span class="nowrap">2377–</span>2385.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Advances+in+Neural+Information+Processing+Systems&amp;rft.atitle=Training+Very+Deep+Networks&amp;rft.volume=28&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E2377-%3C%2Fspan%3E2385&amp;rft.date=2015&amp;rft.aulast=Srivastava&amp;rft.aufirst=Rupesh+K&amp;rft.au=Greff%2C+Klaus&amp;rft.au=Schmidhuber%2C+Juergen&amp;rft_id=http%3A%2F%2Fpapers.nips.cc%2Fpaper%2F5850-training-very-deep-networks&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-mostcited2021-71"><span class="mw-cite-backlink"><b><a href="#cite_ref-mostcited2021_71-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSchmidhuber2021" class="citation news cs1">Schmidhuber, Jürgen (2021). <a rel="nofollow" class="external text" href="https://people.idsia.ch/~juergen/most-cited-neural-nets.html">"The most cited neural networks all build on work done in my labs"</a>. <i>AI Blog</i>. IDSIA, Switzerland<span class="reference-accessdate">. Retrieved <span class="nowrap">2022-04-30</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=AI+Blog&amp;rft.atitle=The+most+cited+neural+networks+all+build+on+work+done+in+my+labs&amp;rft.date=2021&amp;rft.aulast=Schmidhuber&amp;rft.aufirst=J%C3%BCrgen&amp;rft_id=https%3A%2F%2Fpeople.idsia.ch%2F~juergen%2Fmost-cited-neural-nets.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-resnet2015-72"><span class="mw-cite-backlink"><b><a href="#cite_ref-resnet2015_72-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHeZhangRenSun2016" class="citation conference cs1">He, Kaiming; Zhang, Xiangyu; Ren, Shaoqing; Sun, Jian (2016). <a rel="nofollow" class="external text" href="https://ieeexplore.ieee.org/document/7780459"><i>Deep Residual Learning for Image Recognition</i></a>. <i>2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</i>. Las Vegas, NV, USA: IEEE. pp.&#160;<span class="nowrap">770–</span>778. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1512.03385">1512.03385</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FCVPR.2016.90">10.1109/CVPR.2016.90</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-4673-8851-1" title="Special:BookSources/978-1-4673-8851-1"><bdi>978-1-4673-8851-1</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=conference&amp;rft.jtitle=2016+IEEE+Conference+on+Computer+Vision+and+Pattern+Recognition+%28CVPR%29&amp;rft.atitle=Deep+Residual+Learning+for+Image+Recognition&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E770-%3C%2Fspan%3E778&amp;rft.date=2016&amp;rft_id=info%3Aarxiv%2F1512.03385&amp;rft_id=info%3Adoi%2F10.1109%2FCVPR.2016.90&amp;rft.isbn=978-1-4673-8851-1&amp;rft.aulast=He&amp;rft.aufirst=Kaiming&amp;rft.au=Zhang%2C+Xiangyu&amp;rft.au=Ren%2C+Shaoqing&amp;rft.au=Sun%2C+Jian&amp;rft_id=https%3A%2F%2Fieeexplore.ieee.org%2Fdocument%2F7780459&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-73"><span class="mw-cite-backlink"><b><a href="#cite_ref-73">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBeckPöppelSpanringAuer2024" class="citation arxiv cs1">Beck, Maximilian; Pöppel, Korbinian; Spanring, Markus; Auer, Andreas; Prudnikova, Oleksandra; Kopp, Michael; Klambauer, Günter; Brandstetter, Johannes; Hochreiter, Sepp (2024-05-07). "xLSTM: Extended Long Short-Term Memory". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2405.04517">2405.04517</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.LG">cs.LG</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=xLSTM%3A+Extended+Long+Short-Term+Memory&amp;rft.date=2024-05-07&amp;rft_id=info%3Aarxiv%2F2405.04517&amp;rft.aulast=Beck&amp;rft.aufirst=Maximilian&amp;rft.au=P%C3%B6ppel%2C+Korbinian&amp;rft.au=Spanring%2C+Markus&amp;rft.au=Auer%2C+Andreas&amp;rft.au=Prudnikova%2C+Oleksandra&amp;rft.au=Kopp%2C+Michael&amp;rft.au=Klambauer%2C+G%C3%BCnter&amp;rft.au=Brandstetter%2C+Johannes&amp;rft.au=Hochreiter%2C+Sepp&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-74"><span class="mw-cite-backlink"><b><a href="#cite_ref-74">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation cs2"><a rel="nofollow" class="external text" href="https://github.com/NX-AI/xlstm"><i>NX-AI/xlstm</i></a>, NXAI, 2024-06-04<span class="reference-accessdate">, retrieved <span class="nowrap">2024-06-04</span></span></cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=NX-AI%2Fxlstm&amp;rft.pub=NXAI&amp;rft.date=2024-06-04&amp;rft_id=https%3A%2F%2Fgithub.com%2FNX-AI%2Fxlstm&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-graves2004-75"><span class="mw-cite-backlink"><b><a href="#cite_ref-graves2004_75-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGravesBeringerEckSchmidhuber2004" class="citation conference cs1">Graves, Alex; Beringer, Nicole; Eck, Douglas; Schmidhuber, Juergen (2004). <i>Biologically Plausible Speech Recognition with LSTM Neural Nets</i>. Workshop on Biologically Inspired Approaches to Advanced Information Technology, Bio-ADIT 2004, Lausanne, Switzerland. pp.&#160;<span class="nowrap">175–</span>184.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Biologically+Plausible+Speech+Recognition+with+LSTM+Neural+Nets.&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E175-%3C%2Fspan%3E184&amp;rft.date=2004&amp;rft.aulast=Graves&amp;rft.aufirst=Alex&amp;rft.au=Beringer%2C+Nicole&amp;rft.au=Eck%2C+Douglas&amp;rft.au=Schmidhuber%2C+Juergen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-76"><span class="mw-cite-backlink"><b><a href="#cite_ref-76">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFHochreiterYoungerConwell2001" class="citation book cs1">Hochreiter, S.; Younger, A. S.; Conwell, P. R. (2001). "Learning to Learn Using Gradient Descent". <a rel="nofollow" class="external text" href="http://www.bioinf.jku.at/publications/older/1504.pdf"><i>Artificial Neural Networks — ICANN 2001</i></a> <span class="cs1-format">(PDF)</span>. Lecture Notes in Computer Science. Vol.&#160;2130. pp.&#160;<span class="nowrap">87–</span>94. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.5.323">10.1.1.5.323</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F3-540-44668-0_13">10.1007/3-540-44668-0_13</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-3-540-42486-4" title="Special:BookSources/978-3-540-42486-4"><bdi>978-3-540-42486-4</bdi></a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0302-9743">0302-9743</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:52872549">52872549</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Learning+to+Learn+Using+Gradient+Descent&amp;rft.btitle=Artificial+Neural+Networks+%E2%80%94+ICANN+2001&amp;rft.series=Lecture+Notes+in+Computer+Science&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E87-%3C%2Fspan%3E94&amp;rft.date=2001&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A52872549%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1007%2F3-540-44668-0_13&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.5.323%23id-name%3DCiteSeerX&amp;rft.issn=0302-9743&amp;rft.isbn=978-3-540-42486-4&amp;rft.aulast=Hochreiter&amp;rft.aufirst=S.&amp;rft.au=Younger%2C+A.+S.&amp;rft.au=Conwell%2C+P.+R.&amp;rft_id=http%3A%2F%2Fwww.bioinf.jku.at%2Fpublications%2Folder%2F1504.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-wierstra2007-77"><span class="mw-cite-backlink"><b><a href="#cite_ref-wierstra2007_77-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWierstraFoersterPetersSchmidhuber2005" class="citation journal cs1">Wierstra, Daan; Foerster, Alexander; Peters, Jan; Schmidhuber, Juergen (2005). <a rel="nofollow" class="external text" href="https://people.idsia.ch/~juergen/lstm-policy-gradient-2010.html">"Solving Deep Memory POMDPs with Recurrent Policy Gradients"</a>. <i>International Conference on Artificial Neural Networks ICANN'07</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=International+Conference+on+Artificial+Neural+Networks+ICANN%2707&amp;rft.atitle=Solving+Deep+Memory+POMDPs+with+Recurrent+Policy+Gradients&amp;rft.date=2005&amp;rft.aulast=Wierstra&amp;rft.aufirst=Daan&amp;rft.au=Foerster%2C+Alexander&amp;rft.au=Peters%2C+Jan&amp;rft.au=Schmidhuber%2C+Juergen&amp;rft_id=https%3A%2F%2Fpeople.idsia.ch%2F~juergen%2Flstm-policy-gradient-2010.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-bayer2009-78"><span class="mw-cite-backlink"><b><a href="#cite_ref-bayer2009_78-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBayerWierstraTogeliusSchmidhuber2009" class="citation journal cs1">Bayer, Justin; Wierstra, Daan; Togelius, Julian; Schmidhuber, Juergen (2009). "Evolving memory cell structures for sequence learning". <i>International Conference on Artificial Neural Networks ICANN'09, Cyprus</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=International+Conference+on+Artificial+Neural+Networks+ICANN%2709%2C+Cyprus&amp;rft.atitle=Evolving+memory+cell+structures+for+sequence+learning&amp;rft.date=2009&amp;rft.aulast=Bayer&amp;rft.aufirst=Justin&amp;rft.au=Wierstra%2C+Daan&amp;rft.au=Togelius%2C+Julian&amp;rft.au=Schmidhuber%2C+Juergen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-graves2009-79"><span class="mw-cite-backlink"><b><a href="#cite_ref-graves2009_79-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGravesLiwickiFernándezBertolami2009" class="citation journal cs1">Graves, A.; Liwicki, M.; Fernández, S.; Bertolami, R.; Bunke, H.; Schmidhuber, J. (May 2009). "A Novel Connectionist System for Unconstrained Handwriting Recognition". <i>IEEE Transactions on Pattern Analysis and Machine Intelligence</i>. <b>31</b> (5): <span class="nowrap">855–</span>868. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.139.4502">10.1.1.139.4502</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2Ftpami.2008.137">10.1109/tpami.2008.137</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0162-8828">0162-8828</a>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/19299860">19299860</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:14635907">14635907</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=IEEE+Transactions+on+Pattern+Analysis+and+Machine+Intelligence&amp;rft.atitle=A+Novel+Connectionist+System+for+Unconstrained+Handwriting+Recognition&amp;rft.volume=31&amp;rft.issue=5&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E855-%3C%2Fspan%3E868&amp;rft.date=2009-05&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A14635907%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1109%2Ftpami.2008.137&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.139.4502%23id-name%3DCiteSeerX&amp;rft.issn=0162-8828&amp;rft_id=info%3Apmid%2F19299860&amp;rft.aulast=Graves&amp;rft.aufirst=A.&amp;rft.au=Liwicki%2C+M.&amp;rft.au=Fern%C3%A1ndez%2C+S.&amp;rft.au=Bertolami%2C+R.&amp;rft.au=Bunke%2C+H.&amp;rft.au=Schmidhuber%2C+J.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-maergner2009-80"><span class="mw-cite-backlink"><b><a href="#cite_ref-maergner2009_80-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMärgnerAbed2009" class="citation book cs1">Märgner, Volker; Abed, Haikal El (July 2009). "ICDAR 2009 Arabic Handwriting Recognition Competition". <i>2009 10th International Conference on Document Analysis and Recognition</i>. pp.&#160;<span class="nowrap">1383–</span>1387. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FICDAR.2009.256">10.1109/ICDAR.2009.256</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-4244-4500-4" title="Special:BookSources/978-1-4244-4500-4"><bdi>978-1-4244-4500-4</bdi></a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:52851337">52851337</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=ICDAR+2009+Arabic+Handwriting+Recognition+Competition&amp;rft.btitle=2009+10th+International+Conference+on+Document+Analysis+and+Recognition&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1383-%3C%2Fspan%3E1387&amp;rft.date=2009-07&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A52851337%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1109%2FICDAR.2009.256&amp;rft.isbn=978-1-4244-4500-4&amp;rft.aulast=M%C3%A4rgner&amp;rft.aufirst=Volker&amp;rft.au=Abed%2C+Haikal+El&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-81"><span class="mw-cite-backlink"><b><a href="#cite_ref-81">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://biometrics.cse.msu.edu/Publications/MachineLearning/Baytasetal_PatientSubtypingViaTimeAwareLSTMNetworks.pdf">"Patient Subtyping via Time-Aware LSTM Networks"</a> <span class="cs1-format">(PDF)</span>. <i>msu.edu</i><span class="reference-accessdate">. Retrieved <span class="nowrap">21 Nov</span> 2018</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=msu.edu&amp;rft.atitle=Patient+Subtyping+via+Time-Aware+LSTM+Networks&amp;rft_id=http%3A%2F%2Fbiometrics.cse.msu.edu%2FPublications%2FMachineLearning%2FBaytasetal_PatientSubtypingViaTimeAwareLSTMNetworks.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-82"><span class="mw-cite-backlink"><b><a href="#cite_ref-82">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://www.kdd.org/kdd2017/papers/view/patient-subtyping-via-time-aware-lstm-networks">"Patient Subtyping via Time-Aware LSTM Networks"</a>. <i>Kdd.org</i><span class="reference-accessdate">. Retrieved <span class="nowrap">24 May</span> 2018</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Kdd.org&amp;rft.atitle=Patient+Subtyping+via+Time-Aware+LSTM+Networks&amp;rft_id=http%3A%2F%2Fwww.kdd.org%2Fkdd2017%2Fpapers%2Fview%2Fpatient-subtyping-via-time-aware-lstm-networks&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> <li id="cite_note-83"><span class="mw-cite-backlink"><b><a href="#cite_ref-83">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://www.kdd.org">"SIGKDD"</a>. <i>Kdd.org</i><span class="reference-accessdate">. Retrieved <span class="nowrap">24 May</span> 2018</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Kdd.org&amp;rft.atitle=SIGKDD&amp;rft_id=http%3A%2F%2Fwww.kdd.org&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></span> </li> </ol></div></div> <div class="mw-heading mw-heading2"><h2 id="Further_reading">Further reading</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=18" title="Edit section: Further reading"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFMonnerReggia2010" class="citation journal cs1">Monner, Derek D.; Reggia, James A. (2010). <a rel="nofollow" class="external text" href="http://www.cs.umd.edu/~dmonner/papers/nn2012.pdf">"A generalized LSTM-like training algorithm for second-order recurrent neural networks"</a> <span class="cs1-format">(PDF)</span>. <i>Neural Networks</i>. <b>25</b> (1): <span class="nowrap">70–</span>83. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1016%2Fj.neunet.2011.07.003">10.1016/j.neunet.2011.07.003</a>. <a href="/wiki/PMC_(identifier)" class="mw-redirect" title="PMC (identifier)">PMC</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3217173">3217173</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/21803542">21803542</a>. <q>High-performing extension of LSTM that has been simplified to a single node type and can train arbitrary architectures</q></cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Neural+Networks&amp;rft.atitle=A+generalized+LSTM-like+training+algorithm+for+second-order+recurrent+neural+networks&amp;rft.volume=25&amp;rft.issue=1&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E70-%3C%2Fspan%3E83&amp;rft.date=2010&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC3217173%23id-name%3DPMC&amp;rft_id=info%3Apmid%2F21803542&amp;rft_id=info%3Adoi%2F10.1016%2Fj.neunet.2011.07.003&amp;rft.aulast=Monner&amp;rft.aufirst=Derek+D.&amp;rft.au=Reggia%2C+James+A.&amp;rft_id=http%3A%2F%2Fwww.cs.umd.edu%2F~dmonner%2Fpapers%2Fnn2012.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></li> <li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGersSchraudolphSchmidhuber2002" class="citation journal cs1">Gers, Felix A.; Schraudolph, Nicol N.; Schmidhuber, Jürgen (Aug 2002). <a rel="nofollow" class="external text" href="http://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf">"Learning precise timing with LSTM recurrent networks"</a> <span class="cs1-format">(PDF)</span>. <i>Journal of Machine Learning Research</i>. <b>3</b>: <span class="nowrap">115–</span>143.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Journal+of+Machine+Learning+Research&amp;rft.atitle=Learning+precise+timing+with+LSTM+recurrent+networks&amp;rft.volume=3&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E115-%3C%2Fspan%3E143&amp;rft.date=2002-08&amp;rft.aulast=Gers&amp;rft.aufirst=Felix+A.&amp;rft.au=Schraudolph%2C+Nicol+N.&amp;rft.au=Schmidhuber%2C+J%C3%BCrgen&amp;rft_id=http%3A%2F%2Fwww.jmlr.org%2Fpapers%2Fvolume3%2Fgers02a%2Fgers02a.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></li> <li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFGers2001" class="citation web cs1">Gers, Felix (2001). <a rel="nofollow" class="external text" href="http://www.felixgers.de/papers/phd.pdf">"Long Short-Term Memory in Recurrent Neural Networks"</a> <span class="cs1-format">(PDF)</span>. <i>PhD thesis</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=PhD+thesis&amp;rft.atitle=Long+Short-Term+Memory+in+Recurrent+Neural+Networks&amp;rft.date=2001&amp;rft.aulast=Gers&amp;rft.aufirst=Felix&amp;rft_id=http%3A%2F%2Fwww.felixgers.de%2Fpapers%2Fphd.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></li> <li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFAbidogun2005" class="citation thesis cs1">Abidogun, Olusola Adeniyi (2005). <a rel="nofollow" class="external text" href="http://etd.uwc.ac.za/xmlui/handle/11394/249"><i>Data Mining, Fraud Detection and Mobile Telecommunications: Call Pattern Analysis with Unsupervised Neural Networks</i></a>. <i>Master's Thesis</i> (Thesis). University of the Western Cape. <a href="/wiki/Hdl_(identifier)" class="mw-redirect" title="Hdl (identifier)">hdl</a>:<a rel="nofollow" class="external text" href="https://hdl.handle.net/11394%2F249">11394/249</a>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20120522234026/http://etd.uwc.ac.za/usrfiles/modules/etd/docs/etd_init_3937_1174040706.pdf">Archived</a> <span class="cs1-format">(PDF)</span> from the original on May 22, 2012.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adissertation&amp;rft.title=Data+Mining%2C+Fraud+Detection+and+Mobile+Telecommunications%3A+Call+Pattern+Analysis+with+Unsupervised+Neural+Networks&amp;rft.inst=University+of+the+Western+Cape&amp;rft.date=2005&amp;rft_id=info%3Ahdl%2F11394%2F249&amp;rft.aulast=Abidogun&amp;rft.aufirst=Olusola+Adeniyi&amp;rft_id=http%3A%2F%2Fetd.uwc.ac.za%2Fxmlui%2Fhandle%2F11394%2F249&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span> <ul><li><a rel="nofollow" class="external text" href="http://etd.uwc.ac.za/bitstream/handle/11394/249/Abidogun_MSC_2005.pdf">original</a> with two chapters devoted to explaining recurrent neural networks, especially LSTM.</li></ul></li></ul> <div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Long_short-term_memory&amp;action=edit&amp;section=19" title="Edit section: External links"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a rel="nofollow" class="external text" href="http://www.idsia.ch/~juergen/rnn.html">Recurrent Neural Networks</a> with over 30 LSTM papers by <a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a>'s group at <a href="/wiki/IDSIA" class="mw-redirect" title="IDSIA">IDSIA</a></li> <li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFZhangLiptonLiSmola2024" class="citation book cs1">Zhang, Aston; Lipton, Zachary; Li, Mu; Smola, Alexander J. (2024). <a rel="nofollow" class="external text" href="https://d2l.ai/chapter_recurrent-modern/lstm.html">"10.1. Long Short-Term Memory (LSTM)"</a>. <i>Dive into deep learning</i>. Cambridge New York Port Melbourne New Delhi Singapore: Cambridge University Press. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-009-38943-3" title="Special:BookSources/978-1-009-38943-3"><bdi>978-1-009-38943-3</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=10.1.+Long+Short-Term+Memory+%28LSTM%29&amp;rft.btitle=Dive+into+deep+learning&amp;rft.place=Cambridge+New+York+Port+Melbourne+New+Delhi+Singapore&amp;rft.pub=Cambridge+University+Press&amp;rft.date=2024&amp;rft.isbn=978-1-009-38943-3&amp;rft.aulast=Zhang&amp;rft.aufirst=Aston&amp;rft.au=Lipton%2C+Zachary&amp;rft.au=Li%2C+Mu&amp;rft.au=Smola%2C+Alexander+J.&amp;rft_id=https%3A%2F%2Fd2l.ai%2Fchapter_recurrent-modern%2Flstm.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALong+short-term+memory" class="Z3988"></span></li></ul> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="Artificial_intelligence_(AI)752" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239400231" /><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Artificial_intelligence_navbox" title="Template:Artificial intelligence navbox"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Artificial_intelligence_navbox" title="Template talk:Artificial intelligence navbox"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Artificial_intelligence_navbox" title="Special:EditPage/Template:Artificial intelligence navbox"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Artificial_intelligence_(AI)752" style="font-size:114%;margin:0 4em"><a href="/wiki/Artificial_intelligence" title="Artificial intelligence">Artificial intelligence</a> (AI)</div></th></tr><tr><td class="navbox-abovebelow" colspan="2"><div><a href="/wiki/History_of_artificial_intelligence" title="History of artificial intelligence">History</a> (<a href="/wiki/Timeline_of_artificial_intelligence" title="Timeline of artificial intelligence">timeline</a>)</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Concepts</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Parameter" title="Parameter">Parameter</a> <ul><li><a href="/wiki/Hyperparameter_(machine_learning)" title="Hyperparameter (machine learning)">Hyperparameter</a></li></ul></li> <li><a href="/wiki/Loss_functions_for_classification" title="Loss functions for classification">Loss functions</a></li> <li><a href="/wiki/Regression_analysis" title="Regression analysis">Regression</a> <ul><li><a href="/wiki/Bias%E2%80%93variance_tradeoff" title="Bias–variance tradeoff">Bias–variance tradeoff</a></li> <li><a href="/wiki/Double_descent" title="Double descent">Double descent</a></li> <li><a href="/wiki/Overfitting" title="Overfitting">Overfitting</a></li></ul></li> <li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Clustering</a></li> <li><a href="/wiki/Gradient_descent" title="Gradient descent">Gradient descent</a> <ul><li><a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">SGD</a></li> <li><a href="/wiki/Quasi-Newton_method" title="Quasi-Newton method">Quasi-Newton method</a></li> <li><a href="/wiki/Conjugate_gradient_method" title="Conjugate gradient method">Conjugate gradient method</a></li></ul></li> <li><a href="/wiki/Backpropagation" title="Backpropagation">Backpropagation</a></li> <li><a href="/wiki/Attention_(machine_learning)" title="Attention (machine learning)">Attention</a></li> <li><a href="/wiki/Convolution" title="Convolution">Convolution</a></li> <li><a href="/wiki/Normalization_(machine_learning)" title="Normalization (machine learning)">Normalization</a> <ul><li><a href="/wiki/Batch_normalization" title="Batch normalization">Batchnorm</a></li></ul></li> <li><a href="/wiki/Activation_function" title="Activation function">Activation</a> <ul><li><a href="/wiki/Softmax_function" title="Softmax function">Softmax</a></li> <li><a href="/wiki/Sigmoid_function" title="Sigmoid function">Sigmoid</a></li> <li><a href="/wiki/Rectifier_(neural_networks)" title="Rectifier (neural networks)">Rectifier</a></li></ul></li> <li><a href="/wiki/Gating_mechanism" title="Gating mechanism">Gating</a></li> <li><a href="/wiki/Weight_initialization" title="Weight initialization">Weight initialization</a></li> <li><a href="/wiki/Regularization_(mathematics)" title="Regularization (mathematics)">Regularization</a></li> <li><a href="/wiki/Training,_validation,_and_test_data_sets" title="Training, validation, and test data sets">Datasets</a> <ul><li><a href="/wiki/Data_augmentation" title="Data augmentation">Augmentation</a></li></ul></li> <li><a href="/wiki/Prompt_engineering" title="Prompt engineering">Prompt engineering</a></li> <li><a href="/wiki/Reinforcement_learning" title="Reinforcement learning">Reinforcement learning</a> <ul><li><a href="/wiki/Q-learning" title="Q-learning">Q-learning</a></li> <li><a href="/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action" title="State–action–reward–state–action">SARSA</a></li> <li><a href="/wiki/Imitation_learning" title="Imitation learning">Imitation</a></li> <li><a href="/wiki/Policy_gradient_method" title="Policy gradient method">Policy gradient</a></li></ul></li> <li><a href="/wiki/Diffusion_process" title="Diffusion process">Diffusion</a></li> <li><a href="/wiki/Latent_diffusion_model" title="Latent diffusion model">Latent diffusion model</a></li> <li><a href="/wiki/Autoregressive_model" title="Autoregressive model">Autoregression</a></li> <li><a href="/wiki/Adversarial_machine_learning" title="Adversarial machine learning">Adversary</a></li> <li><a href="/wiki/Retrieval-augmented_generation" title="Retrieval-augmented generation">RAG</a></li> <li><a href="/wiki/Uncanny_valley" title="Uncanny valley">Uncanny valley</a></li> <li><a href="/wiki/Reinforcement_learning_from_human_feedback" title="Reinforcement learning from human feedback">RLHF</a></li> <li><a href="/wiki/Self-supervised_learning" title="Self-supervised learning">Self-supervised learning</a></li> <li><a href="/wiki/Recursive_self-improvement" title="Recursive self-improvement">Recursive self-improvement</a></li> <li><a href="/wiki/Word_embedding" title="Word embedding">Word embedding</a></li> <li><a href="/wiki/Hallucination_(artificial_intelligence)" title="Hallucination (artificial intelligence)">Hallucination</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Applications</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a> <ul><li><a href="/wiki/Prompt_engineering#In-context_learning" title="Prompt engineering">In-context learning</a></li></ul></li> <li><a href="/wiki/Neural_network_(machine_learning)" title="Neural network (machine learning)">Artificial neural network</a> <ul><li><a href="/wiki/Deep_learning" title="Deep learning">Deep learning</a></li></ul></li> <li><a href="/wiki/Language_model" title="Language model">Language model</a> <ul><li><a href="/wiki/Large_language_model" title="Large language model">Large language model</a></li> <li><a href="/wiki/Neural_machine_translation" title="Neural machine translation">NMT</a></li></ul></li> <li><a href="/wiki/Artificial_general_intelligence" title="Artificial general intelligence">Artificial general intelligence</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Implementations</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Audio–visual</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlexNet" title="AlexNet">AlexNet</a></li> <li><a href="/wiki/WaveNet" title="WaveNet">WaveNet</a></li> <li><a href="/wiki/Human_image_synthesis" title="Human image synthesis">Human image synthesis</a></li> <li><a href="/wiki/Handwriting_recognition" title="Handwriting recognition">HWR</a></li> <li><a href="/wiki/Optical_character_recognition" title="Optical character recognition">OCR</a></li> <li><a href="/wiki/Deep_learning_speech_synthesis" title="Deep learning speech synthesis">Speech synthesis</a> <ul><li><a href="/wiki/15.ai" title="15.ai">15.ai</a></li> <li><a href="/wiki/ElevenLabs" title="ElevenLabs">ElevenLabs</a></li></ul></li> <li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a> <ul><li><a href="/wiki/Whisper_(speech_recognition_system)" title="Whisper (speech recognition system)">Whisper</a></li></ul></li> <li><a href="/wiki/Facial_recognition_system" title="Facial recognition system">Facial recognition</a></li> <li><a href="/wiki/AlphaFold" title="AlphaFold">AlphaFold</a></li> <li><a href="/wiki/Text-to-image_model" title="Text-to-image model">Text-to-image models</a> <ul><li><a href="/wiki/Aurora_(text-to-image_model)" class="mw-redirect" title="Aurora (text-to-image model)">Aurora</a></li> <li><a href="/wiki/DALL-E" title="DALL-E">DALL-E</a></li> <li><a href="/wiki/Adobe_Firefly" title="Adobe Firefly">Firefly</a></li> <li><a href="/wiki/Flux_(text-to-image_model)" title="Flux (text-to-image model)">Flux</a></li> <li><a href="/wiki/Ideogram_(text-to-image_model)" title="Ideogram (text-to-image model)">Ideogram</a></li> <li><a href="/wiki/Google_Brain#Text-to-image_model" title="Google Brain">Imagen</a></li> <li><a href="/wiki/Midjourney" title="Midjourney">Midjourney</a></li> <li><a href="/wiki/Stable_Diffusion" title="Stable Diffusion">Stable Diffusion</a></li></ul></li> <li><a href="/wiki/Text-to-video_model" title="Text-to-video model">Text-to-video models</a> <ul><li><a href="/wiki/Dream_Machine_(text-to-video_model)" title="Dream Machine (text-to-video model)">Dream Machine</a></li> <li><a href="/wiki/Runway_(company)#Gen-3_Alpha" title="Runway (company)">Gen-3 Alpha</a></li> <li><a href="/wiki/MiniMax_(company)#Hailuo_AI" title="MiniMax (company)">Hailuo AI</a></li> <li><a href="/wiki/Kling_(text-to-video_model)" class="mw-redirect" title="Kling (text-to-video model)">Kling</a></li> <li><a href="/wiki/Sora_(text-to-video_model)" title="Sora (text-to-video model)">Sora</a></li> <li><a href="/wiki/Google_DeepMind#Video_model" title="Google DeepMind">Veo</a></li></ul></li> <li><a href="/wiki/Music_and_artificial_intelligence" title="Music and artificial intelligence">Music generation</a> <ul><li><a href="/wiki/Suno_AI" title="Suno AI">Suno AI</a></li> <li><a href="/wiki/Udio" title="Udio">Udio</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Text</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Word2vec" title="Word2vec">Word2vec</a></li> <li><a href="/wiki/Seq2seq" title="Seq2seq">Seq2seq</a></li> <li><a href="/wiki/GloVe" title="GloVe">GloVe</a></li> <li><a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a></li> <li><a href="/wiki/T5_(language_model)" title="T5 (language model)">T5</a></li> <li><a href="/wiki/Llama_(language_model)" title="Llama (language model)">Llama</a></li> <li><a href="/wiki/Chinchilla_(language_model)" title="Chinchilla (language model)">Chinchilla AI</a></li> <li><a href="/wiki/PaLM" title="PaLM">PaLM</a></li> <li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPT</a> <ul><li><a href="/wiki/GPT-1" title="GPT-1">1</a></li> <li><a href="/wiki/GPT-2" title="GPT-2">2</a></li> <li><a href="/wiki/GPT-3" title="GPT-3">3</a></li> <li><a href="/wiki/GPT-J" title="GPT-J">J</a></li> <li><a href="/wiki/ChatGPT" title="ChatGPT">ChatGPT</a></li> <li><a href="/wiki/GPT-4" title="GPT-4">4</a></li> <li><a href="/wiki/GPT-4o" title="GPT-4o">4o</a></li> <li><a href="/wiki/GPT-4.5" title="GPT-4.5">4.5</a></li> <li><a href="/wiki/OpenAI_o1" title="OpenAI o1">o1</a></li> <li><a href="/wiki/OpenAI_o3" title="OpenAI o3">o3</a></li></ul></li> <li><a href="/wiki/Claude_(language_model)" title="Claude (language model)">Claude</a></li> <li><a href="/wiki/Gemini_(language_model)" title="Gemini (language model)">Gemini</a> <ul><li><a href="/wiki/Gemini_(chatbot)" title="Gemini (chatbot)">chatbot</a></li></ul></li> <li><a href="/wiki/Grok_(chatbot)" title="Grok (chatbot)">Grok</a></li> <li><a href="/wiki/LaMDA" title="LaMDA">LaMDA</a></li> <li><a href="/wiki/BLOOM_(language_model)" title="BLOOM (language model)">BLOOM</a></li> <li><a href="/wiki/Project_Debater" title="Project Debater">Project Debater</a></li> <li><a href="/wiki/IBM_Watson" title="IBM Watson">IBM Watson</a></li> <li><a href="/wiki/IBM_Watsonx" title="IBM Watsonx">IBM Watsonx</a></li> <li><a href="/wiki/IBM_Granite" title="IBM Granite">Granite</a></li> <li><a href="/wiki/Huawei_PanGu" title="Huawei PanGu">PanGu-Σ</a></li> <li><a href="/wiki/DeepSeek_(chatbot)" title="DeepSeek (chatbot)">DeepSeek</a></li> <li><a href="/wiki/Qwen" title="Qwen">Qwen</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Decisional</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AlphaGo" title="AlphaGo">AlphaGo</a></li> <li><a href="/wiki/AlphaZero" title="AlphaZero">AlphaZero</a></li> <li><a href="/wiki/OpenAI_Five" title="OpenAI Five">OpenAI Five</a></li> <li><a href="/wiki/Self-driving_car" title="Self-driving car">Self-driving car</a></li> <li><a href="/wiki/MuZero" title="MuZero">MuZero</a></li> <li><a href="/wiki/Action_selection" title="Action selection">Action selection</a> <ul><li><a href="/wiki/AutoGPT" title="AutoGPT">AutoGPT</a></li></ul></li> <li><a href="/wiki/Robot_control" title="Robot control">Robot control</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">People</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Alan_Turing" title="Alan Turing">Alan Turing</a></li> <li><a href="/wiki/Warren_Sturgis_McCulloch" title="Warren Sturgis McCulloch">Warren Sturgis McCulloch</a></li> <li><a href="/wiki/Walter_Pitts" title="Walter Pitts">Walter Pitts</a></li> <li><a href="/wiki/John_von_Neumann" title="John von Neumann">John von Neumann</a></li> <li><a href="/wiki/Claude_Shannon" title="Claude Shannon">Claude Shannon</a></li> <li><a href="/wiki/Marvin_Minsky" title="Marvin Minsky">Marvin Minsky</a></li> <li><a href="/wiki/John_McCarthy_(computer_scientist)" title="John McCarthy (computer scientist)">John McCarthy</a></li> <li><a href="/wiki/Nathaniel_Rochester_(computer_scientist)" title="Nathaniel Rochester (computer scientist)">Nathaniel Rochester</a></li> <li><a href="/wiki/Allen_Newell" title="Allen Newell">Allen Newell</a></li> <li><a href="/wiki/Cliff_Shaw" title="Cliff Shaw">Cliff Shaw</a></li> <li><a href="/wiki/Herbert_A._Simon" title="Herbert A. Simon">Herbert A. Simon</a></li> <li><a href="/wiki/Oliver_Selfridge" title="Oliver Selfridge">Oliver Selfridge</a></li> <li><a href="/wiki/Frank_Rosenblatt" title="Frank Rosenblatt">Frank Rosenblatt</a></li> <li><a href="/wiki/Bernard_Widrow" title="Bernard Widrow">Bernard Widrow</a></li> <li><a href="/wiki/Joseph_Weizenbaum" title="Joseph Weizenbaum">Joseph Weizenbaum</a></li> <li><a href="/wiki/Seymour_Papert" title="Seymour Papert">Seymour Papert</a></li> <li><a href="/wiki/Seppo_Linnainmaa" title="Seppo Linnainmaa">Seppo Linnainmaa</a></li> <li><a href="/wiki/Paul_Werbos" title="Paul Werbos">Paul Werbos</a></li> <li><a href="/wiki/J%C3%BCrgen_Schmidhuber" title="Jürgen Schmidhuber">Jürgen Schmidhuber</a></li> <li><a href="/wiki/Yann_LeCun" title="Yann LeCun">Yann LeCun</a></li> <li><a href="/wiki/Geoffrey_Hinton" title="Geoffrey Hinton">Geoffrey Hinton</a></li> <li><a href="/wiki/John_Hopfield" title="John Hopfield">John Hopfield</a></li> <li><a href="/wiki/Yoshua_Bengio" title="Yoshua Bengio">Yoshua Bengio</a></li> <li><a href="/wiki/Lotfi_A._Zadeh" title="Lotfi A. Zadeh">Lotfi A. Zadeh</a></li> <li><a href="/wiki/Stephen_Grossberg" title="Stephen Grossberg">Stephen Grossberg</a></li> <li><a href="/wiki/Alex_Graves_(computer_scientist)" title="Alex Graves (computer scientist)">Alex Graves</a></li> <li><a href="/wiki/Andrew_Ng" title="Andrew Ng">Andrew Ng</a></li> <li><a href="/wiki/Fei-Fei_Li" title="Fei-Fei Li">Fei-Fei Li</a></li> <li><a href="/wiki/Alex_Krizhevsky" title="Alex Krizhevsky">Alex Krizhevsky</a></li> <li><a href="/wiki/Ilya_Sutskever" title="Ilya Sutskever">Ilya Sutskever</a></li> <li><a href="/wiki/Demis_Hassabis" title="Demis Hassabis">Demis Hassabis</a></li> <li><a href="/wiki/David_Silver_(computer_scientist)" title="David Silver (computer scientist)">David Silver</a></li> <li><a href="/wiki/Ian_Goodfellow" title="Ian Goodfellow">Ian Goodfellow</a></li> <li><a href="/wiki/Andrej_Karpathy" title="Andrej Karpathy">Andrej Karpathy</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Architectures</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Neural_Turing_machine" title="Neural Turing machine">Neural Turing machine</a></li> <li><a href="/wiki/Differentiable_neural_computer" title="Differentiable neural computer">Differentiable neural computer</a></li> <li><a href="/wiki/Transformer_(deep_learning_architecture)" title="Transformer (deep learning architecture)">Transformer</a> <ul><li><a href="/wiki/Vision_transformer" title="Vision transformer">Vision transformer (ViT)</a></li></ul></li> <li><a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">Recurrent neural network (RNN)</a></li> <li><a class="mw-selflink selflink">Long short-term memory (LSTM)</a></li> <li><a href="/wiki/Gated_recurrent_unit" title="Gated recurrent unit">Gated recurrent unit (GRU)</a></li> <li><a href="/wiki/Echo_state_network" title="Echo state network">Echo state network</a></li> <li><a href="/wiki/Multilayer_perceptron" title="Multilayer perceptron">Multilayer perceptron (MLP)</a></li> <li><a href="/wiki/Convolutional_neural_network" title="Convolutional neural network">Convolutional neural network (CNN)</a></li> <li><a href="/wiki/Residual_neural_network" title="Residual neural network">Residual neural network (RNN)</a></li> <li><a href="/wiki/Highway_network" title="Highway network">Highway network</a></li> <li><a href="/wiki/Mamba_(deep_learning_architecture)" title="Mamba (deep learning architecture)">Mamba</a></li> <li><a href="/wiki/Autoencoder" title="Autoencoder">Autoencoder</a></li> <li><a href="/wiki/Variational_autoencoder" title="Variational autoencoder">Variational autoencoder (VAE)</a></li> <li><a href="/wiki/Generative_adversarial_network" title="Generative adversarial network">Generative adversarial network (GAN)</a></li> <li><a href="/wiki/Graph_neural_network" title="Graph neural network">Graph neural network (GNN)</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><a href="/wiki/File:Symbol_portal_class.svg" class="mw-file-description" title="Portal"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/16px-Symbol_portal_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/23px-Symbol_portal_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/e/e2/Symbol_portal_class.svg/31px-Symbol_portal_class.svg.png 2x" data-file-width="180" data-file-height="185" /></a></span> Portals <ul><li><a href="/wiki/Portal:Technology" title="Portal:Technology">Technology</a></li></ul></li> <li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> <a href="/wiki/Category:Artificial_intelligence" title="Category:Artificial intelligence">Category</a> <ul><li><a href="/wiki/Category:Artificial_neural_networks" title="Category:Artificial neural networks">Artificial neural networks</a></li> <li><a href="/wiki/Category:Machine_learning" title="Category:Machine learning">Machine learning</a></li></ul></li> <li><span class="noviewer" typeof="mw:File"><span title="List-Class article"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/d/db/Symbol_list_class.svg/16px-Symbol_list_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/d/db/Symbol_list_class.svg/23px-Symbol_list_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/d/db/Symbol_list_class.svg/31px-Symbol_list_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> List <ul><li><a href="/wiki/List_of_artificial_intelligence_companies" title="List of artificial intelligence companies">Companies</a></li> <li><a href="/wiki/List_of_artificial_intelligence_projects" title="List of artificial intelligence projects">Projects</a></li></ul></li></ul> </div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐web.codfw.main‐5886496d‐pn6rs Cached time: 20250316130348 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 1.088 seconds Real time usage: 1.286 seconds Preprocessor visited node count: 6235/1000000 Post‐expand include size: 278765/2097152 bytes Template argument size: 3131/2097152 bytes Highest expansion depth: 12/100 Expensive parser function count: 5/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 358164/5000000 bytes Lua time usage: 0.698/10.000 seconds Lua memory usage: 7815290/52428800 bytes Number of Wikibase entities loaded: 1/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 1004.873 1 -total 63.96% 642.749 1 Template:Reflist 26.82% 269.469 33 Template:Cite_journal 9.68% 97.267 1 Template:Machine_learning 9.11% 91.529 1 Template:Sidebar_with_collapsible_lists 7.68% 77.205 12 Template:Cite_book 7.36% 73.922 1 Template:Short_description 5.75% 57.811 10 Template:Cite_arXiv 5.38% 54.041 1 Template:Cite_Q 4.76% 47.832 12 Template:Cite_web --> <!-- Saved in parser cache with key enwiki:pcache:10711453:|#|:idhash:canonical and timestamp 20250316130348 and revision id 1280106018. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?useformat=desktop&amp;type=1x1&amp;usesul3=0" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Long_short-term_memory&amp;oldid=1280106018">https://en.wikipedia.org/w/index.php?title=Long_short-term_memory&amp;oldid=1280106018</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Neural_network_architectures" title="Category:Neural network architectures">Neural network architectures</a></li><li><a href="/wiki/Category:Deep_learning" title="Category:Deep learning">Deep learning</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:CS1_maint:_multiple_names:_authors_list" title="Category:CS1 maint: multiple names: authors list">CS1 maint: multiple names: authors list</a></li><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_is_different_from_Wikidata" title="Category:Short description is different from Wikidata">Short description is different from Wikidata</a></li><li><a href="/wiki/Category:Wikipedia_articles_that_are_too_technical_from_March_2022" title="Category:Wikipedia articles that are too technical from March 2022">Wikipedia articles that are too technical from March 2022</a></li><li><a href="/wiki/Category:All_articles_that_are_too_technical" title="Category:All articles that are too technical">All articles that are too technical</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 12 March 2025, at 14:40<span class="anonymous-show">&#160;(UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Long_short-term_memory&amp;mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><picture><source media="(min-width: 500px)" srcset="/static/images/footer/wikimedia-button.svg" width="84" height="29"><img src="/static/images/footer/wikimedia.svg" width="25" height="25" alt="Wikimedia Foundation" lang="en" loading="lazy"></picture></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><picture><source media="(min-width: 500px)" srcset="/w/resources/assets/poweredby_mediawiki.svg" width="88" height="31"><img src="/w/resources/assets/mediawiki_compact.svg" alt="Powered by MediaWiki" lang="en" width="25" height="25" loading="lazy"></picture></a></li> </ul> </footer> </div> </div> </div> <div class="vector-header-container vector-sticky-header-container"> <div id="vector-sticky-header" class="vector-sticky-header"> <div class="vector-sticky-header-start"> <div class="vector-sticky-header-icon-start vector-button-flush-left vector-button-flush-right" aria-hidden="true"> <button class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-sticky-header-search-toggle" tabindex="-1" data-event-name="ui.vector-sticky-search-form.icon"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </button> </div> <div role="search" class="vector-search-box-vue vector-search-box-show-thumbnail vector-search-box"> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail"> <form action="/w/index.php" id="vector-sticky-search-form" class="cdx-search-input cdx-search-input--has-end-button"> <div class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia"> <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <div class="vector-sticky-header-context-bar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-sticky-header-toc" class="vector-dropdown mw-portlet mw-portlet-sticky-header-toc vector-sticky-header-toc vector-button-flush-left" > <input type="checkbox" id="vector-sticky-header-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-sticky-header-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-sticky-header-toc-label" for="vector-sticky-header-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-sticky-header-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div class="vector-sticky-header-context-bar-primary" aria-hidden="true" ><span class="mw-page-title-main">Long short-term memory</span></div> </div> </div> <div class="vector-sticky-header-end" aria-hidden="true"> <div class="vector-sticky-header-icons"> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-talk-sticky-header" tabindex="-1" data-event-name="talk-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbles mw-ui-icon-wikimedia-speechBubbles"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-subject-sticky-header" tabindex="-1" data-event-name="subject-sticky-header"><span class="vector-icon mw-ui-icon-article mw-ui-icon-wikimedia-article"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-history-sticky-header" tabindex="-1" data-event-name="history-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-history mw-ui-icon-wikimedia-wikimedia-history"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only mw-watchlink" id="ca-watchstar-sticky-header" tabindex="-1" data-event-name="watch-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-star mw-ui-icon-wikimedia-wikimedia-star"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-edit-sticky-header" tabindex="-1" data-event-name="wikitext-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-wikiText mw-ui-icon-wikimedia-wikimedia-wikiText"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-ve-edit-sticky-header" tabindex="-1" data-event-name="ve-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-edit mw-ui-icon-wikimedia-wikimedia-edit"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-viewsource-sticky-header" tabindex="-1" data-event-name="ve-edit-protected-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-editLock mw-ui-icon-wikimedia-wikimedia-editLock"></span> <span></span> </a> </div> <div class="vector-sticky-header-buttons"> <button class="cdx-button cdx-button--weight-quiet mw-interlanguage-selector" id="p-lang-btn-sticky-header" tabindex="-1" data-event-name="ui.dropdown-p-lang-btn-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-language mw-ui-icon-wikimedia-wikimedia-language"></span> <span>22 languages</span> </button> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive" id="ca-addsection-sticky-header" tabindex="-1" data-event-name="addsection-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbleAdd-progressive mw-ui-icon-wikimedia-speechBubbleAdd-progressive"></span> <span>Add topic</span> </a> </div> <div class="vector-sticky-header-icon-end"> <div class="vector-user-links"> </div> </div> </div> </div> </div> <div class="mw-portlet mw-portlet-dock-bottom emptyPortlet" id="p-dock-bottom"> <ul> </ul> </div> <script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-5886496d-qm4ph","wgBackendResponseTime":225,"wgPageParseReport":{"limitreport":{"cputime":"1.088","walltime":"1.286","ppvisitednodes":{"value":6235,"limit":1000000},"postexpandincludesize":{"value":278765,"limit":2097152},"templateargumentsize":{"value":3131,"limit":2097152},"expansiondepth":{"value":12,"limit":100},"expensivefunctioncount":{"value":5,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":358164,"limit":5000000},"entityaccesscount":{"value":1,"limit":400},"timingprofile":["100.00% 1004.873 1 -total"," 63.96% 642.749 1 Template:Reflist"," 26.82% 269.469 33 Template:Cite_journal"," 9.68% 97.267 1 Template:Machine_learning"," 9.11% 91.529 1 Template:Sidebar_with_collapsible_lists"," 7.68% 77.205 12 Template:Cite_book"," 7.36% 73.922 1 Template:Short_description"," 5.75% 57.811 10 Template:Cite_arXiv"," 5.38% 54.041 1 Template:Cite_Q"," 4.76% 47.832 12 Template:Cite_web"]},"scribunto":{"limitreport-timeusage":{"value":"0.698","limit":"10.000"},"limitreport-memusage":{"value":7815290,"limit":52428800},"limitreport-logs":"1 1 Sepp Hochreiter\n2 2 Jürgen Schmidhuber\n"},"cachereport":{"origin":"mw-web.codfw.main-5886496d-pn6rs","timestamp":"20250316130348","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Long short-term memory","url":"https:\/\/en.wikipedia.org\/wiki\/Long_short-term_memory","sameAs":"http:\/\/www.wikidata.org\/entity\/Q6673524","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q6673524","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2007-04-16T20:18:38Z","dateModified":"2025-03-12T14:40:36Z","image":"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/9\/93\/LSTM_Cell.svg","headline":"artificial recurrent neural network architecture used in deep learning"}</script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10