CINXE.COM
Reinforcement Learning Research Papers - Academia.edu
<!DOCTYPE html> <html lang="en" xmlns:fb="http://www.facebook.com/2008/fbml" class="wf-loading"> <head prefix="og: https://ogp.me/ns# fb: https://ogp.me/ns/fb# academia: https://ogp.me/ns/fb/academia#"> <meta charset="utf-8"> <meta name=viewport content="width=device-width, initial-scale=1"> <meta rel="search" type="application/opensearchdescription+xml" href="/open_search.xml" title="Academia.edu"> <title>Reinforcement Learning Research Papers - Academia.edu</title> <!-- _ _ _ | | (_) | | __ _ ___ __ _ __| | ___ _ __ ___ _ __ _ ___ __| |_ _ / _` |/ __/ _` |/ _` |/ _ \ '_ ` _ \| |/ _` | / _ \/ _` | | | | | (_| | (_| (_| | (_| | __/ | | | | | | (_| || __/ (_| | |_| | \__,_|\___\__,_|\__,_|\___|_| |_| |_|_|\__,_(_)___|\__,_|\__,_| We're hiring! See https://www.academia.edu/hiring --> <link href="//a.academia-assets.com/images/favicons/favicon-production.ico" rel="shortcut icon" type="image/vnd.microsoft.icon"> <link rel="apple-touch-icon" sizes="57x57" href="//a.academia-assets.com/images/favicons/apple-touch-icon-57x57.png"> <link rel="apple-touch-icon" sizes="60x60" href="//a.academia-assets.com/images/favicons/apple-touch-icon-60x60.png"> <link rel="apple-touch-icon" sizes="72x72" href="//a.academia-assets.com/images/favicons/apple-touch-icon-72x72.png"> <link rel="apple-touch-icon" sizes="76x76" href="//a.academia-assets.com/images/favicons/apple-touch-icon-76x76.png"> <link rel="apple-touch-icon" sizes="114x114" href="//a.academia-assets.com/images/favicons/apple-touch-icon-114x114.png"> <link rel="apple-touch-icon" sizes="120x120" href="//a.academia-assets.com/images/favicons/apple-touch-icon-120x120.png"> <link rel="apple-touch-icon" sizes="144x144" href="//a.academia-assets.com/images/favicons/apple-touch-icon-144x144.png"> <link rel="apple-touch-icon" sizes="152x152" href="//a.academia-assets.com/images/favicons/apple-touch-icon-152x152.png"> <link rel="apple-touch-icon" sizes="180x180" href="//a.academia-assets.com/images/favicons/apple-touch-icon-180x180.png"> <link rel="icon" type="image/png" href="//a.academia-assets.com/images/favicons/favicon-32x32.png" sizes="32x32"> <link rel="icon" type="image/png" href="//a.academia-assets.com/images/favicons/favicon-194x194.png" sizes="194x194"> <link rel="icon" type="image/png" href="//a.academia-assets.com/images/favicons/favicon-96x96.png" sizes="96x96"> <link rel="icon" type="image/png" href="//a.academia-assets.com/images/favicons/android-chrome-192x192.png" sizes="192x192"> <link rel="icon" type="image/png" href="//a.academia-assets.com/images/favicons/favicon-16x16.png" sizes="16x16"> <link rel="manifest" href="//a.academia-assets.com/images/favicons/manifest.json"> <meta name="msapplication-TileColor" content="#2b5797"> <meta name="msapplication-TileImage" content="//a.academia-assets.com/images/favicons/mstile-144x144.png"> <meta name="theme-color" content="#ffffff"> <script> window.performance && window.performance.measure && window.performance.measure("Time To First Byte", "requestStart", "responseStart"); </script> <script> (function() { if (!window.URLSearchParams || !window.history || !window.history.replaceState) { return; } var searchParams = new URLSearchParams(window.location.search); var paramsToDelete = [ 'fs', 'sm', 'swp', 'iid', 'nbs', 'rcc', // related content category 'rcpos', // related content carousel position 'rcpg', // related carousel page 'rchid', // related content hit id 'f_ri', // research interest id, for SEO tracking 'f_fri', // featured research interest, for SEO tracking (param key without value) 'f_rid', // from research interest directory for SEO tracking 'f_loswp', // from research interest pills on LOSWP sidebar for SEO tracking 'rhid', // referrring hit id ]; if (paramsToDelete.every((key) => searchParams.get(key) === null)) { return; } paramsToDelete.forEach((key) => { searchParams.delete(key); }); var cleanUrl = new URL(window.location.href); cleanUrl.search = searchParams.toString(); history.replaceState({}, document.title, cleanUrl); })(); </script> <script async src="https://www.googletagmanager.com/gtag/js?id=G-5VKX33P2DS"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'G-5VKX33P2DS', { cookie_domain: 'academia.edu', send_page_view: false, }); gtag('event', 'page_view', { 'controller': "by_tag", 'action': "show_one", 'controller_action': 'by_tag#show_one', 'logged_in': 'false', 'edge': 'unknown', // Send nil if there is no A/B test bucket, in case some records get logged // with missing data - that way we can distinguish between the two cases. // ab_test_bucket should be of the form <ab_test_name>:<bucket> 'ab_test_bucket': null, }) </script> <script type="text/javascript"> window.sendUserTiming = function(timingName) { if (!(window.performance && window.performance.measure)) return; var entries = window.performance.getEntriesByName(timingName, "measure"); if (entries.length !== 1) return; var timingValue = Math.round(entries[0].duration); gtag('event', 'timing_complete', { name: timingName, value: timingValue, event_category: 'User-centric', }); }; window.sendUserTiming("Time To First Byte"); </script> <meta name="csrf-param" content="authenticity_token" /> <meta name="csrf-token" content="jtUU30boLuW4AVFSfnnWii8oqCdp9eW4D3gljIxL6YlMtYT7p_HgSkLE4fG6kbeGU3JZLKvGYVvfaKAVSvZ0OA" /> <link href="/Documents/in/Reinforcement_Learning?after=50%2C630060" rel="next" /><link crossorigin="" href="https://fonts.gstatic.com/" rel="preconnect" /><link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,100..1000;1,9..40,100..1000&family=Gupter:wght@400;500;700&family=IBM+Plex+Mono:wght@300;400&family=Material+Symbols+Outlined:opsz,wght,FILL,GRAD@20,400,0,0&display=swap" rel="stylesheet" /><link rel="stylesheet" media="all" href="//a.academia-assets.com/assets/design_system/common-2b6f90dbd75f5941bc38f4ad716615f3ac449e7398313bb3bc225fba451cd9fa.css" /> <meta name="description" content="View Reinforcement Learning Research Papers on Academia.edu for free." /> <meta name="google-site-verification" content="bKJMBZA7E43xhDOopFZkssMMkBRjvYERV-NaN4R6mrs" /> <script> var $controller_name = 'by_tag'; var $action_name = "show_one"; var $rails_env = 'production'; var $app_rev = 'f7d3e63afa9b1033038d30954379acb657cfe157'; var $domain = 'academia.edu'; var $app_host = "academia.edu"; var $asset_host = "academia-assets.com"; var $start_time = new Date().getTime(); var $recaptcha_key = "6LdxlRMTAAAAADnu_zyLhLg0YF9uACwz78shpjJB"; var $recaptcha_invisible_key = "6Lf3KHUUAAAAACggoMpmGJdQDtiyrjVlvGJ6BbAj"; var $disableClientRecordHit = false; </script> <script> window.Aedu = { hit_data: null }; window.Aedu.SiteStats = {"premium_universities_count":14008,"monthly_visitors":"108 million","monthly_visitor_count":108476212,"monthly_visitor_count_in_millions":108,"user_count":283802040,"paper_count":55203019,"paper_count_in_millions":55,"page_count":432000000,"page_count_in_millions":432,"pdf_count":16500000,"pdf_count_in_millions":16}; window.Aedu.serverRenderTime = new Date(1740600424000); window.Aedu.timeDifference = new Date().getTime() - 1740600424000; window.Aedu.isUsingCssV1 = false; window.Aedu.enableLocalization = true; window.Aedu.activateFullstory = false; window.Aedu.serviceAvailability = { status: {"attention_db":"on","bibliography_db":"on","contacts_db":"on","email_db":"on","indexability_db":"on","mentions_db":"on","news_db":"on","notifications_db":"on","offsite_mentions_db":"on","redshift":"on","redshift_exports_db":"on","related_works_db":"on","ring_db":"on","user_tests_db":"on"}, serviceEnabled: function(service) { return this.status[service] === "on"; }, readEnabled: function(service) { return this.serviceEnabled(service) || this.status[service] === "read_only"; }, }; window.Aedu.viewApmTrace = function() { // Check if x-apm-trace-id meta tag is set, and open the trace in APM // in a new window if it is. var apmTraceId = document.head.querySelector('meta[name="x-apm-trace-id"]'); if (apmTraceId) { var traceId = apmTraceId.content; // Use trace ID to construct URL, an example URL looks like: // https://app.datadoghq.com/apm/traces?query=trace_id%31298410148923562634 var apmUrl = 'https://app.datadoghq.com/apm/traces?query=trace_id%3A' + traceId; window.open(apmUrl, '_blank'); } }; </script> <!--[if lt IE 9]> <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script> <![endif]--> <link href="https://fonts.googleapis.com/css?family=Roboto:100,100i,300,300i,400,400i,500,500i,700,700i,900,900i" rel="stylesheet"> <link rel="preload" href="//maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css" as="style" onload="this.rel='stylesheet'"> <link rel="stylesheet" media="all" href="//a.academia-assets.com/assets/libraries-a9675dcb01ec4ef6aa807ba772c7a5a00c1820d3ff661c1038a20f80d06bb4e4.css" /> <link rel="stylesheet" media="all" href="//a.academia-assets.com/assets/academia-1eb081e01ca8bc0c1b1d866df79d9eb4dd2c484e4beecf76e79a7806c72fee08.css" /> <link rel="stylesheet" media="all" href="//a.academia-assets.com/assets/design_system_legacy-056a9113b9a0f5343d013b29ee1929d5a18be35fdcdceb616600b4db8bd20054.css" /> <script src="//a.academia-assets.com/assets/webpack_bundles/runtime-bundle-005434038af4252ca37c527588411a3d6a0eabb5f727fac83f8bbe7fd88d93bb.js"></script> <script src="//a.academia-assets.com/assets/webpack_bundles/webpack_libraries_and_infrequently_changed.wjs-bundle-3d6a0fc1a24347dfb16a9ce3dfdd96bbf39cc6e1d390f2e12e20fc6249a397ed.js"></script> <script src="//a.academia-assets.com/assets/webpack_bundles/core_webpack.wjs-bundle-e3dc02fc8ca7230d51ed9d586e67606aa86cc41d5d96aec8224b0cfff74915da.js"></script> <script src="//a.academia-assets.com/assets/webpack_bundles/sentry.wjs-bundle-5fe03fddca915c8ba0f7edbe64c194308e8ce5abaed7bffe1255ff37549c4808.js"></script> <script> jade = window.jade || {}; jade.helpers = window.$h; jade._ = window._; </script> <!-- Google Tag Manager --> <script id="tag-manager-head-root">(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer_old','GTM-5G9JF7Z');</script> <!-- End Google Tag Manager --> <script> window.gptadslots = []; window.googletag = window.googletag || {}; window.googletag.cmd = window.googletag.cmd || []; </script> <script type="text/javascript"> // TODO(jacob): This should be defined, may be rare load order problem. // Checking if null is just a quick fix, will default to en if unset. // Better fix is to run this immedietely after I18n is set. if (window.I18n != null) { I18n.defaultLocale = "en"; I18n.locale = "en"; I18n.fallbacks = true; } </script> <link rel="canonical" href="https://www.academia.edu/Documents/in/Reinforcement_Learning" /> </head> <!--[if gte IE 9 ]> <body class='ie ie9 c-by_tag a-show_one logged_out u-bgColorWhite'> <![endif]--> <!--[if !(IE) ]><!--> <body class='c-by_tag a-show_one logged_out u-bgColorWhite'> <!--<![endif]--> <div id="fb-root"></div><script>window.fbAsyncInit = function() { FB.init({ appId: "2369844204", version: "v8.0", status: true, cookie: true, xfbml: true }); // Additional initialization code. if (window.InitFacebook) { // facebook.ts already loaded, set it up. window.InitFacebook(); } else { // Set a flag for facebook.ts to find when it loads. window.academiaAuthReadyFacebook = true; } };</script><script>window.fbAsyncLoad = function() { // Protection against double calling of this function if (window.FB) { return; } (function(d, s, id){ var js, fjs = d.getElementsByTagName(s)[0]; if (d.getElementById(id)) {return;} js = d.createElement(s); js.id = id; js.src = "//connect.facebook.net/en_US/sdk.js"; fjs.parentNode.insertBefore(js, fjs); }(document, 'script', 'facebook-jssdk')); } if (!window.defer_facebook) { // Autoload if not deferred window.fbAsyncLoad(); } else { // Defer loading by 5 seconds setTimeout(function() { window.fbAsyncLoad(); }, 5000); }</script> <div id="google-root"></div><script>window.loadGoogle = function() { if (window.InitGoogle) { // google.ts already loaded, set it up. window.InitGoogle("331998490334-rsn3chp12mbkiqhl6e7lu2q0mlbu0f1b"); } else { // Set a flag for google.ts to use when it loads. window.GoogleClientID = "331998490334-rsn3chp12mbkiqhl6e7lu2q0mlbu0f1b"; } };</script><script>window.googleAsyncLoad = function() { // Protection against double calling of this function (function(d) { var js; var id = 'google-jssdk'; var ref = d.getElementsByTagName('script')[0]; if (d.getElementById(id)) { return; } js = d.createElement('script'); js.id = id; js.async = true; js.onload = loadGoogle; js.src = "https://accounts.google.com/gsi/client" ref.parentNode.insertBefore(js, ref); }(document)); } if (!window.defer_google) { // Autoload if not deferred window.googleAsyncLoad(); } else { // Defer loading by 5 seconds setTimeout(function() { window.googleAsyncLoad(); }, 5000); }</script> <div id="tag-manager-body-root"> <!-- Google Tag Manager (noscript) --> <noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5G9JF7Z" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript> <!-- End Google Tag Manager (noscript) --> <!-- Event listeners for analytics --> <script> window.addEventListener('load', function() { if (document.querySelector('input[name="commit"]')) { document.querySelector('input[name="commit"]').addEventListener('click', function() { gtag('event', 'click', { event_category: 'button', event_label: 'Log In' }) }) } }); </script> </div> <script>var _comscore = _comscore || []; _comscore.push({ c1: "2", c2: "26766707" }); (function() { var s = document.createElement("script"), el = document.getElementsByTagName("script")[0]; s.async = true; s.src = (document.location.protocol == "https:" ? "https://sb" : "http://b") + ".scorecardresearch.com/beacon.js"; el.parentNode.insertBefore(s, el); })();</script><img src="https://sb.scorecardresearch.com/p?c1=2&c2=26766707&cv=2.0&cj=1" style="position: absolute; visibility: hidden" /> <div id='react-modal'></div> <div class='DesignSystem'> <a class='u-showOnFocus' href='#site'> Skip to main content </a> </div> <div id="upgrade_ie_banner" style="display: none;"><p>Academia.edu no longer supports Internet Explorer.</p><p>To browse Academia.edu and the wider internet faster and more securely, please take a few seconds to <a href="https://www.academia.edu/upgrade-browser">upgrade your browser</a>.</p></div><script>// Show this banner for all versions of IE if (!!window.MSInputMethodContext || /(MSIE)/.test(navigator.userAgent)) { document.getElementById('upgrade_ie_banner').style.display = 'block'; }</script> <div class="DesignSystem bootstrap ShrinkableNav no-sm no-md"><div class="navbar navbar-default main-header"><div class="container-wrapper" id="main-header-container"><div class="container"><div class="navbar-header"><div class="nav-left-wrapper u-mt0x"><div class="nav-logo"><a data-main-header-link-target="logo_home" href="https://www.academia.edu/"><img class="visible-xs-inline-block" style="height: 24px;" alt="Academia.edu" src="//a.academia-assets.com/images/academia-logo-redesign-2015-A.svg" width="24" height="24" /><img width="145.2" height="18" class="hidden-xs" style="height: 24px;" alt="Academia.edu" src="//a.academia-assets.com/images/academia-logo-redesign-2015.svg" /></a></div><div class="nav-search"><div class="SiteSearch-wrapper select2-no-default-pills"><form class="js-SiteSearch-form DesignSystem" action="https://www.academia.edu/search" accept-charset="UTF-8" method="get"><i class="SiteSearch-icon fa fa-search u-fw700 u-positionAbsolute u-tcGrayDark"></i><input class="js-SiteSearch-form-input SiteSearch-form-input form-control" data-main-header-click-target="search_input" name="q" placeholder="Search" type="text" value="" /></form></div></div></div><div class="nav-right-wrapper pull-right"><ul class="NavLinks js-main-nav list-unstyled"><li class="NavLinks-link"><a class="js-header-login-url Button Button--inverseGray Button--sm u-mb4x" id="nav_log_in" rel="nofollow" href="https://www.academia.edu/login">Log In</a></li><li class="NavLinks-link u-p0x"><a class="Button Button--inverseGray Button--sm u-mb4x" rel="nofollow" href="https://www.academia.edu/signup">Sign Up</a></li></ul><button class="hidden-lg hidden-md hidden-sm u-ml4x navbar-toggle collapsed" data-target=".js-mobile-header-links" data-toggle="collapse" type="button"><span class="icon-bar"></span><span class="icon-bar"></span><span class="icon-bar"></span></button></div></div><div class="collapse navbar-collapse js-mobile-header-links"><ul class="nav navbar-nav"><li class="u-borderColorGrayLight u-borderBottom1"><a rel="nofollow" href="https://www.academia.edu/login">Log In</a></li><li class="u-borderColorGrayLight u-borderBottom1"><a rel="nofollow" href="https://www.academia.edu/signup">Sign Up</a></li><li class="u-borderColorGrayLight u-borderBottom1 js-mobile-nav-expand-trigger"><a href="#">more <span class="caret"></span></a></li><li><ul class="js-mobile-nav-expand-section nav navbar-nav u-m0x collapse"><li class="u-borderColorGrayLight u-borderBottom1"><a rel="false" href="https://www.academia.edu/about">About</a></li><li class="u-borderColorGrayLight u-borderBottom1"><a rel="nofollow" href="https://www.academia.edu/press">Press</a></li><li class="u-borderColorGrayLight u-borderBottom1"><a rel="false" href="https://www.academia.edu/documents">Papers</a></li><li class="u-borderColorGrayLight u-borderBottom1"><a rel="nofollow" href="https://www.academia.edu/terms">Terms</a></li><li class="u-borderColorGrayLight u-borderBottom1"><a rel="nofollow" href="https://www.academia.edu/privacy">Privacy</a></li><li class="u-borderColorGrayLight u-borderBottom1"><a rel="nofollow" href="https://www.academia.edu/copyright">Copyright</a></li><li class="u-borderColorGrayLight u-borderBottom1"><a rel="nofollow" href="https://www.academia.edu/hiring"><i class="fa fa-briefcase"></i> We're Hiring!</a></li><li class="u-borderColorGrayLight u-borderBottom1"><a rel="nofollow" href="https://support.academia.edu/hc/en-us"><i class="fa fa-question-circle"></i> Help Center</a></li><li class="js-mobile-nav-collapse-trigger u-borderColorGrayLight u-borderBottom1 dropup" style="display:none"><a href="#">less <span class="caret"></span></a></li></ul></li></ul></div></div></div><script>(function(){ var $moreLink = $(".js-mobile-nav-expand-trigger"); var $lessLink = $(".js-mobile-nav-collapse-trigger"); var $section = $('.js-mobile-nav-expand-section'); $moreLink.click(function(ev){ ev.preventDefault(); $moreLink.hide(); $lessLink.show(); $section.collapse('show'); }); $lessLink.click(function(ev){ ev.preventDefault(); $moreLink.show(); $lessLink.hide(); $section.collapse('hide'); }); })() if ($a.is_logged_in() || false) { new Aedu.NavigationController({ el: '.js-main-nav', showHighlightedNotification: false }); } else { $(".js-header-login-url").attr("href", $a.loginUrlWithRedirect()); } Aedu.autocompleteSearch = new AutocompleteSearch({el: '.js-SiteSearch-form'});</script></div></div> <div id='site' class='fixed'> <div id="content" class="clearfix"> <script>document.addEventListener('DOMContentLoaded', function(){ var $dismissible = $(".dismissible_banner"); $dismissible.click(function(ev) { $dismissible.hide(); }); });</script> <div class="DesignSystem" style="margin-top:-40px"><div class="PageHeader"><div class="container"><div class="row"><style type="text/css">.sor-abstract { display: -webkit-box; overflow: hidden; text-overflow: ellipsis; -webkit-line-clamp: 3; -webkit-box-orient: vertical; }</style><div class="col-xs-12 clearfix"><div class="u-floatLeft"><h1 class="PageHeader-title u-m0x u-fs30">Reinforcement Learning</h1><div class="u-tcGrayDark">29,258 Followers</div><div class="u-tcGrayDark u-mt2x">Recent papers in <b>Reinforcement Learning</b></div></div></div></div></div></div><div class="TabbedNavigation"><div class="container"><div class="row"><div class="col-xs-12 clearfix"><ul class="nav u-m0x u-p0x list-inline u-displayFlex"><li class="active"><a href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Top Papers</a></li><li><a href="https://www.academia.edu/Documents/in/Reinforcement_Learning/MostCited">Most Cited Papers</a></li><li><a href="https://www.academia.edu/Documents/in/Reinforcement_Learning/MostDownloaded">Most Downloaded Papers</a></li><li><a href="https://www.academia.edu/Documents/in/Reinforcement_Learning/MostRecent">Newest Papers</a></li><li><a class="" href="https://www.academia.edu/People/Reinforcement_Learning">People</a></li></ul></div><style type="text/css">ul.nav{flex-direction:row}@media(max-width: 567px){ul.nav{flex-direction:column}.TabbedNavigation li{max-width:100%}.TabbedNavigation li.active{background-color:var(--background-grey, #dddde2)}.TabbedNavigation li.active:before,.TabbedNavigation li.active:after{display:none}}</style></div></div></div><div class="container"><div class="row"><div class="col-xs-12"><div class="u-displayFlex"><div class="u-flexGrow1"><div class="works"><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_40062835" data-work_id="40062835" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/40062835/Autonomous_Learning_of_Reward_Distribution_in_Not100_Game">Autonomous Learning of Reward Distribution in Not100 Game</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">In this paper, autonomous learning of reward distri- bution in multi-agent reinforcement learning was ap- plied to the 4 player game named &quot;not100&quot;. In this game, more shrewd tactics to cooperate with the other agents is... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_40062835" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">In this paper, autonomous learning of reward distri- bution in multi-agent reinforcement learning was ap- plied to the 4 player game named &quot;not100&quot;. In this game, more shrewd tactics to cooperate with the other agents is required for each agent than the other tasks that the learning was applied previously. The reward distribution ratio after learning was varied among sim- ulation runs. However, the validity of the average non-uniform reward distribution ratio was examined in some ways. The three agents with higher win proba- bility after learning cooperated mutually, while strong cooperation was not observed in some cases when the agents learned with a fixed distribution ratio.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/40062835" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="fdb8b7d8e97632511fa2037da7d22ff1" rel="nofollow" data-download="{"attachment_id":60264239,"asset_id":40062835,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/60264239/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="57493561" href="https://oita-u.academia.edu/KatsunariShibata">Katsunari Shibata</a><script data-card-contents-for-user="57493561" type="text/json">{"id":57493561,"first_name":"Katsunari","last_name":"Shibata","domain_name":"oita-u","page_name":"KatsunariShibata","display_name":"Katsunari Shibata","profile_url":"https://oita-u.academia.edu/KatsunariShibata?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_40062835 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="40062835"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 40062835, container: ".js-paper-rank-work_40062835", }); });</script></li><li class="js-percentile-work_40062835 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 40062835; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_40062835"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_40062835 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="40062835"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 40062835; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=40062835]").text(description); $(".js-view-count-work_40062835").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_40062835").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="40062835"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">2</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="55675" rel="nofollow" href="https://www.academia.edu/Documents/in/Autonomous_learning">Autonomous learning</a><script data-card-contents-for-ri="55675" type="text/json">{"id":55675,"name":"Autonomous learning","url":"https://www.academia.edu/Documents/in/Autonomous_learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=40062835]'), work: {"id":40062835,"title":"Autonomous Learning of Reward Distribution in Not100 Game","created_at":"2019-08-11T18:32:21.060-07:00","url":"https://www.academia.edu/40062835/Autonomous_Learning_of_Reward_Distribution_in_Not100_Game?f_ri=1688","dom_id":"work_40062835","summary":"In this paper, autonomous learning of reward distri- bution in multi-agent reinforcement learning was ap- plied to the 4 player game named \u0026quot;not100\u0026quot;. In this game, more shrewd tactics to cooperate with the other agents is required for each agent than the other tasks that the learning was applied previously. The reward distribution ratio after learning was varied among sim- ulation runs. However, the validity of the average non-uniform reward distribution ratio was examined in some ways. The three agents with higher win proba- bility after learning cooperated mutually, while strong cooperation was not observed in some cases when the agents learned with a fixed distribution ratio.","downloadable_attachments":[{"id":60264239,"asset_id":40062835,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":57493561,"first_name":"Katsunari","last_name":"Shibata","domain_name":"oita-u","page_name":"KatsunariShibata","display_name":"Katsunari Shibata","profile_url":"https://oita-u.academia.edu/KatsunariShibata?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":55675,"name":"Autonomous learning","url":"https://www.academia.edu/Documents/in/Autonomous_learning?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_63502991" data-work_id="63502991" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/63502991/Hierarchical_dynamic_power_management_using_model_free_reinforcement_learning">Hierarchical dynamic power management using model-free reinforcement learning</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Model-free reinforcement learning (RL) has become a promising technique for designing a robust dynamic power management (DPM) framework that can cope with variations and uncertainties that emanate from hardware and application... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_63502991" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Model-free reinforcement learning (RL) has become a promising technique for designing a robust dynamic power management (DPM) framework that can cope with variations and uncertainties that emanate from hardware and application characteristics. Moreover, the potentially significant benefit of performing application-level scheduling as part of the system-level power management should be harnessed. This paper presents an architecture for hierarchical DPM in an embedded system composed of a processor chip and connected I/O devices (which are called system components.) The goal is to facilitate saving in the system component power consumption, which tends to dominate the total power consumption. The proposed (online) adaptive DPM technique consists of two layers: an RL-based component-level local power manager (LPM) and a system-level global power manager (GPM). The LPM performs component power and latency optimization. It employs temporal difference learning on semi-Markov decision process (SMDP) for model-free RL, and it is specifically optimized for an environment in which multiple (heterogeneous) types of applications can run in the embedded system. The GPM interacts with the CPU scheduler to perform effective application-level scheduling, thereby, enabling the LPM to do even more component power optimizations. In this hierarchical DPM framework, power and latency tradeoffs of each type of application can be precisely controlled based on a user-defined parameter. Experiments show that the amount of average power saving is up to 31.1% compared to existing approaches.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/63502991" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="1932ea0fde968b87f76e6a010e40419f" rel="nofollow" data-download="{"attachment_id":75913965,"asset_id":63502991,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/75913965/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="141686287" href="https://independent.academia.edu/AhmedAmmari4">Ahmed Ammari</a><script data-card-contents-for-user="141686287" type="text/json">{"id":141686287,"first_name":"Ahmed","last_name":"Ammari","domain_name":"independent","page_name":"AhmedAmmari4","display_name":"Ahmed Ammari","profile_url":"https://independent.academia.edu/AhmedAmmari4?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_63502991 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="63502991"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 63502991, container: ".js-paper-rank-work_63502991", }); });</script></li><li class="js-percentile-work_63502991 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 63502991; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_63502991"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_63502991 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="63502991"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 63502991; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=63502991]").text(description); $(".js-view-count-work_63502991").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_63502991").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="63502991"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">5</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="5473" rel="nofollow" href="https://www.academia.edu/Documents/in/Embedded_Systems">Embedded Systems</a>, <script data-card-contents-for-ri="5473" type="text/json">{"id":5473,"name":"Embedded Systems","url":"https://www.academia.edu/Documents/in/Embedded_Systems?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="279495" rel="nofollow" href="https://www.academia.edu/Documents/in/Robustness">Robustness</a>, <script data-card-contents-for-ri="279495" type="text/json">{"id":279495,"name":"Robustness","url":"https://www.academia.edu/Documents/in/Robustness?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1148030" rel="nofollow" href="https://www.academia.edu/Documents/in/Embedded_System">Embedded System</a><script data-card-contents-for-ri="1148030" type="text/json">{"id":1148030,"name":"Embedded System","url":"https://www.academia.edu/Documents/in/Embedded_System?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=63502991]'), work: {"id":63502991,"title":"Hierarchical dynamic power management using model-free reinforcement learning","created_at":"2021-12-07T20:55:11.936-08:00","url":"https://www.academia.edu/63502991/Hierarchical_dynamic_power_management_using_model_free_reinforcement_learning?f_ri=1688","dom_id":"work_63502991","summary":"Model-free reinforcement learning (RL) has become a promising technique for designing a robust dynamic power management (DPM) framework that can cope with variations and uncertainties that emanate from hardware and application characteristics. Moreover, the potentially significant benefit of performing application-level scheduling as part of the system-level power management should be harnessed. This paper presents an architecture for hierarchical DPM in an embedded system composed of a processor chip and connected I/O devices (which are called system components.) The goal is to facilitate saving in the system component power consumption, which tends to dominate the total power consumption. The proposed (online) adaptive DPM technique consists of two layers: an RL-based component-level local power manager (LPM) and a system-level global power manager (GPM). The LPM performs component power and latency optimization. It employs temporal difference learning on semi-Markov decision process (SMDP) for model-free RL, and it is specifically optimized for an environment in which multiple (heterogeneous) types of applications can run in the embedded system. The GPM interacts with the CPU scheduler to perform effective application-level scheduling, thereby, enabling the LPM to do even more component power optimizations. In this hierarchical DPM framework, power and latency tradeoffs of each type of application can be precisely controlled based on a user-defined parameter. Experiments show that the amount of average power saving is up to 31.1% compared to existing approaches.","downloadable_attachments":[{"id":75913965,"asset_id":63502991,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":141686287,"first_name":"Ahmed","last_name":"Ammari","domain_name":"independent","page_name":"AhmedAmmari4","display_name":"Ahmed Ammari","profile_url":"https://independent.academia.edu/AhmedAmmari4?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":5473,"name":"Embedded Systems","url":"https://www.academia.edu/Documents/in/Embedded_Systems?f_ri=1688","nofollow":true},{"id":279495,"name":"Robustness","url":"https://www.academia.edu/Documents/in/Robustness?f_ri=1688","nofollow":true},{"id":1148030,"name":"Embedded System","url":"https://www.academia.edu/Documents/in/Embedded_System?f_ri=1688","nofollow":true},{"id":3139454,"name":"Bayesian Classification","url":"https://www.academia.edu/Documents/in/Bayesian_Classification?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_70562548" data-work_id="70562548" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/70562548/A_Framework_for_Anticipatory_Machine_Improvisation_and_Style_Imitation">A Framework for Anticipatory Machine Improvisation and Style Imitation</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">We present a first step towards anticipatory machine improvisation systems. The proposed system, based on fundamentals of music cognition, is a multi-agent memory-based collaborative and competitive reinforcement learning architecture,... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_70562548" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">We present a first step towards anticipatory machine improvisation systems. The proposed system, based on fundamentals of music cognition, is a multi-agent memory-based collaborative and competitive reinforcement learning architecture, capable of live interaction with a musician or a music score. Results demonstrate the ability to model long-term stylistic planning and need for much less training data than reported in previous works.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/70562548" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="09d0f69c74785579283b4fe4e7aca684" rel="nofollow" data-download="{"attachment_id":80263155,"asset_id":70562548,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/80263155/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="34254858" href="https://ircam.academia.edu/GerardAssayag">Gerard Assayag</a><script data-card-contents-for-user="34254858" type="text/json">{"id":34254858,"first_name":"Gerard","last_name":"Assayag","domain_name":"ircam","page_name":"GerardAssayag","display_name":"Gerard Assayag","profile_url":"https://ircam.academia.edu/GerardAssayag?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_70562548 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="70562548"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 70562548, container: ".js-paper-rank-work_70562548", }); });</script></li><li class="js-percentile-work_70562548 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 70562548; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_70562548"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_70562548 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="70562548"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 70562548; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=70562548]").text(description); $(".js-view-count-work_70562548").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_70562548").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="70562548"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">2</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="15078" rel="nofollow" href="https://www.academia.edu/Documents/in/Music_Cognition">Music Cognition</a><script data-card-contents-for-ri="15078" type="text/json">{"id":15078,"name":"Music Cognition","url":"https://www.academia.edu/Documents/in/Music_Cognition?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=70562548]'), work: {"id":70562548,"title":"A Framework for Anticipatory Machine Improvisation and Style Imitation","created_at":"2022-02-05T08:02:24.142-08:00","url":"https://www.academia.edu/70562548/A_Framework_for_Anticipatory_Machine_Improvisation_and_Style_Imitation?f_ri=1688","dom_id":"work_70562548","summary":"We present a first step towards anticipatory machine improvisation systems. The proposed system, based on fundamentals of music cognition, is a multi-agent memory-based collaborative and competitive reinforcement learning architecture, capable of live interaction with a musician or a music score. Results demonstrate the ability to model long-term stylistic planning and need for much less training data than reported in previous works.","downloadable_attachments":[{"id":80263155,"asset_id":70562548,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":34254858,"first_name":"Gerard","last_name":"Assayag","domain_name":"ircam","page_name":"GerardAssayag","display_name":"Gerard Assayag","profile_url":"https://ircam.academia.edu/GerardAssayag?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":15078,"name":"Music Cognition","url":"https://www.academia.edu/Documents/in/Music_Cognition?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_48682909" data-work_id="48682909" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/48682909/Reinforcement_learning_Un_estudio_comparativo_de_la_performance_de_sus_principales_m%C3%A9todos">Reinforcement learning: Un estudio comparativo de la performance de sus principales métodos</a></div></div><div class="u-pb4x u-mt3x"></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/48682909" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="42e6ff9ffa1b39a83bfe26d26cdb0346" rel="nofollow" data-download="{"attachment_id":67168816,"asset_id":48682909,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/67168816/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="30903932" href="https://unsl.academia.edu/MarceloLuisErrecalde">Marcelo Luis Errecalde</a><script data-card-contents-for-user="30903932" type="text/json">{"id":30903932,"first_name":"Marcelo Luis","last_name":"Errecalde","domain_name":"unsl","page_name":"MarceloLuisErrecalde","display_name":"Marcelo Luis Errecalde","profile_url":"https://unsl.academia.edu/MarceloLuisErrecalde?f_ri=1688","photo":"https://0.academia-photos.com/30903932/9036719/10081418/s65_marcelo_luis.errecalde.jpg"}</script></span></span></li><li class="js-paper-rank-work_48682909 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="48682909"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 48682909, container: ".js-paper-rank-work_48682909", }); });</script></li><li class="js-percentile-work_48682909 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 48682909; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_48682909"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_48682909 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="48682909"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 48682909; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=48682909]").text(description); $(".js-view-count-work_48682909").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_48682909").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="48682909"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">5</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="465" rel="nofollow" href="https://www.academia.edu/Documents/in/Artificial_Intelligence">Artificial Intelligence</a>, <script data-card-contents-for-ri="465" type="text/json">{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a>, <script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="35254" rel="nofollow" href="https://www.academia.edu/Documents/in/Collaborative_Learning">Collaborative Learning</a><script data-card-contents-for-ri="35254" type="text/json">{"id":35254,"name":"Collaborative Learning","url":"https://www.academia.edu/Documents/in/Collaborative_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=48682909]'), work: {"id":48682909,"title":"Reinforcement learning: Un estudio comparativo de la performance de sus principales métodos","created_at":"2021-05-05T04:14:45.544-07:00","url":"https://www.academia.edu/48682909/Reinforcement_learning_Un_estudio_comparativo_de_la_performance_de_sus_principales_m%C3%A9todos?f_ri=1688","dom_id":"work_48682909","summary":null,"downloadable_attachments":[{"id":67168816,"asset_id":48682909,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":30903932,"first_name":"Marcelo Luis","last_name":"Errecalde","domain_name":"unsl","page_name":"MarceloLuisErrecalde","display_name":"Marcelo Luis Errecalde","profile_url":"https://unsl.academia.edu/MarceloLuisErrecalde?f_ri=1688","photo":"https://0.academia-photos.com/30903932/9036719/10081418/s65_marcelo_luis.errecalde.jpg"}],"research_interests":[{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":35254,"name":"Collaborative Learning","url":"https://www.academia.edu/Documents/in/Collaborative_Learning?f_ri=1688","nofollow":true},{"id":43774,"name":"Learning","url":"https://www.academia.edu/Documents/in/Learning?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_32015772" data-work_id="32015772" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/32015772/Controle_de_Epidemias_com_Aprendizado_por_Refor%C3%A7o_Estrat%C3%A9gia_de_Combate_ao_Aedes_aegypti">Controle de Epidemias com Aprendizado por Reforço: Estratégia de Combate ao Aedes aegypti</a></div></div><div class="u-pb4x u-mt3x"></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/32015772" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="06491a5093478718792187df937766f8" rel="nofollow" data-download="{"attachment_id":52281804,"asset_id":32015772,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/52281804/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="24887113" href="https://ufrb.academia.edu/Andr%C3%A9Ottoni">André Ottoni</a><script data-card-contents-for-user="24887113" type="text/json">{"id":24887113,"first_name":"André","last_name":"Ottoni","domain_name":"ufrb","page_name":"AndréOttoni","display_name":"André Ottoni","profile_url":"https://ufrb.academia.edu/Andr%C3%A9Ottoni?f_ri=1688","photo":"https://0.academia-photos.com/24887113/6736468/13410484/s65_andr_.ottoni.jpg"}</script></span></span></li><li class="js-paper-rank-work_32015772 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="32015772"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 32015772, container: ".js-paper-rank-work_32015772", }); });</script></li><li class="js-percentile-work_32015772 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 32015772; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_32015772"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_32015772 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="32015772"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 32015772; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=32015772]").text(description); $(".js-view-count-work_32015772").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_32015772").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="32015772"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">7</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a>, <script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="43257" rel="nofollow" href="https://www.academia.edu/Documents/in/Aedes_aegypti">Aedes aegypti</a>, <script data-card-contents-for-ri="43257" type="text/json">{"id":43257,"name":"Aedes aegypti","url":"https://www.academia.edu/Documents/in/Aedes_aegypti?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="141502" rel="nofollow" href="https://www.academia.edu/Documents/in/Inteligencia_artificial-1">Inteligencia artificial</a><script data-card-contents-for-ri="141502" type="text/json">{"id":141502,"name":"Inteligencia artificial","url":"https://www.academia.edu/Documents/in/Inteligencia_artificial-1?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=32015772]'), work: {"id":32015772,"title":"Controle de Epidemias com Aprendizado por Reforço: Estratégia de Combate ao Aedes aegypti","created_at":"2017-03-23T18:32:25.771-07:00","url":"https://www.academia.edu/32015772/Controle_de_Epidemias_com_Aprendizado_por_Refor%C3%A7o_Estrat%C3%A9gia_de_Combate_ao_Aedes_aegypti?f_ri=1688","dom_id":"work_32015772","summary":null,"downloadable_attachments":[{"id":52281804,"asset_id":32015772,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":24887113,"first_name":"André","last_name":"Ottoni","domain_name":"ufrb","page_name":"AndréOttoni","display_name":"André Ottoni","profile_url":"https://ufrb.academia.edu/Andr%C3%A9Ottoni?f_ri=1688","photo":"https://0.academia-photos.com/24887113/6736468/13410484/s65_andr_.ottoni.jpg"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":43257,"name":"Aedes aegypti","url":"https://www.academia.edu/Documents/in/Aedes_aegypti?f_ri=1688","nofollow":true},{"id":141502,"name":"Inteligencia artificial","url":"https://www.academia.edu/Documents/in/Inteligencia_artificial-1?f_ri=1688","nofollow":true},{"id":217153,"name":"Traveling Salesman Problem","url":"https://www.academia.edu/Documents/in/Traveling_Salesman_Problem?f_ri=1688"},{"id":1969062,"name":"Aprendizado por Reforço","url":"https://www.academia.edu/Documents/in/Aprendizado_por_Refor%C3%A7o?f_ri=1688"},{"id":2573633,"name":"Problema do Caixeiro Viajante","url":"https://www.academia.edu/Documents/in/Problema_do_Caixeiro_Viajante?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_14409868 coauthored" data-work_id="14409868" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/14409868/Its_worse_than_you_thought_The_feedback_negativity_and_violations_of_reward_prediction_in_gambling_tasks">It's worse than you thought: The feedback negativity and violations of reward prediction in gambling tasks</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">The reinforcement learning theory suggests that the feedback negativity should be larger when feedback is unexpected. Two recent studies found, however, that the feedback negativity was unaffected by outcome probability. To further... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_14409868" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">The reinforcement learning theory suggests that the feedback negativity should be larger when feedback is unexpected. Two recent studies found, however, that the feedback negativity was unaffected by outcome probability. To further examine this issue, participants in the present studies made reward predictions on each trial of a gambling task where objective reward probability was indicated by a cue. In Study 1, participants made reward predictions following the cue, but prior to their gambling choice; in Study 2, predictions were made following their gambling choice. Predicted and unpredicted outcomes were associated with equivalent feedback negativities in Study 1. In Study 2, however, the feedback negativity was larger for unpredicted outcomes. These data suggest that the magnitude of the feedback negativity is sensitive to violations of reward prediction, but that this effect may depend on the close coupling of prediction and outcome.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/14409868" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="40a95c7df7d3a72d8a01b31c2f89daa5" rel="nofollow" data-download="{"attachment_id":44169193,"asset_id":14409868,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/44169193/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="33352062" href="https://sbsuny.academia.edu/GregHajcak">Greg Hajcak</a><script data-card-contents-for-user="33352062" type="text/json">{"id":33352062,"first_name":"Greg","last_name":"Hajcak","domain_name":"sbsuny","page_name":"GregHajcak","display_name":"Greg Hajcak","profile_url":"https://sbsuny.academia.edu/GregHajcak?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span><span class="u-displayInlineBlock InlineList-item-text"> and <span class="u-textDecorationUnderline u-clickable InlineList-item-text js-work-more-authors-14409868">+1</span><div class="hidden js-additional-users-14409868"><div><span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a href="https://independent.academia.edu/RobertSimonsa">Robert Simonsa</a></span></div></div></span><script>(function(){ var popoverSettings = { el: $('.js-work-more-authors-14409868'), placement: 'bottom', hide_delay: 200, html: true, content: function(){ return $('.js-additional-users-14409868').html(); } } new HoverPopover(popoverSettings); })();</script></li><li class="js-paper-rank-work_14409868 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="14409868"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 14409868, container: ".js-paper-rank-work_14409868", }); });</script></li><li class="js-percentile-work_14409868 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 14409868; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_14409868"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_14409868 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="14409868"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 14409868; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=14409868]").text(description); $(".js-view-count-work_14409868").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_14409868").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="14409868"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">8</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1026" rel="nofollow" href="https://www.academia.edu/Documents/in/Psychophysiology">Psychophysiology</a>, <script data-card-contents-for-ri="1026" type="text/json">{"id":1026,"name":"Psychophysiology","url":"https://www.academia.edu/Documents/in/Psychophysiology?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="10904" rel="nofollow" href="https://www.academia.edu/Documents/in/Electroencephalography">Electroencephalography</a>, <script data-card-contents-for-ri="10904" type="text/json">{"id":10904,"name":"Electroencephalography","url":"https://www.academia.edu/Documents/in/Electroencephalography?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="47884" rel="nofollow" href="https://www.academia.edu/Documents/in/Biological_Sciences">Biological Sciences</a><script data-card-contents-for-ri="47884" type="text/json">{"id":47884,"name":"Biological Sciences","url":"https://www.academia.edu/Documents/in/Biological_Sciences?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=14409868]'), work: {"id":14409868,"title":"It's worse than you thought: The feedback negativity and violations of reward prediction in gambling tasks","created_at":"2015-07-26T11:39:02.237-07:00","url":"https://www.academia.edu/14409868/Its_worse_than_you_thought_The_feedback_negativity_and_violations_of_reward_prediction_in_gambling_tasks?f_ri=1688","dom_id":"work_14409868","summary":"The reinforcement learning theory suggests that the feedback negativity should be larger when feedback is unexpected. Two recent studies found, however, that the feedback negativity was unaffected by outcome probability. To further examine this issue, participants in the present studies made reward predictions on each trial of a gambling task where objective reward probability was indicated by a cue. In Study 1, participants made reward predictions following the cue, but prior to their gambling choice; in Study 2, predictions were made following their gambling choice. Predicted and unpredicted outcomes were associated with equivalent feedback negativities in Study 1. In Study 2, however, the feedback negativity was larger for unpredicted outcomes. These data suggest that the magnitude of the feedback negativity is sensitive to violations of reward prediction, but that this effect may depend on the close coupling of prediction and outcome.","downloadable_attachments":[{"id":44169193,"asset_id":14409868,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":33352062,"first_name":"Greg","last_name":"Hajcak","domain_name":"sbsuny","page_name":"GregHajcak","display_name":"Greg Hajcak","profile_url":"https://sbsuny.academia.edu/GregHajcak?f_ri=1688","photo":"/images/s65_no_pic.png"},{"id":66416977,"first_name":"Robert","last_name":"Simonsa","domain_name":"independent","page_name":"RobertSimonsa","display_name":"Robert Simonsa","profile_url":"https://independent.academia.edu/RobertSimonsa?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1026,"name":"Psychophysiology","url":"https://www.academia.edu/Documents/in/Psychophysiology?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":10904,"name":"Electroencephalography","url":"https://www.academia.edu/Documents/in/Electroencephalography?f_ri=1688","nofollow":true},{"id":47884,"name":"Biological Sciences","url":"https://www.academia.edu/Documents/in/Biological_Sciences?f_ri=1688","nofollow":true},{"id":49021,"name":"Reward","url":"https://www.academia.edu/Documents/in/Reward?f_ri=1688"},{"id":88325,"name":"Cues","url":"https://www.academia.edu/Documents/in/Cues?f_ri=1688"},{"id":92164,"name":"Gambling","url":"https://www.academia.edu/Documents/in/Gambling?f_ri=1688"},{"id":1318932,"name":"Predictive value of tests","url":"https://www.academia.edu/Documents/in/Predictive_value_of_tests?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_68331656" data-work_id="68331656" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/68331656/Dynamic_Scheduling_in_Petroleum_Process_using_Reinforcement_Learning">Dynamic Scheduling in Petroleum Process using Reinforcement Learning</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Petroleum industry production systems are highly automatized. In this industry, all functions (e.g., planning, scheduling and maintenance) are automated and in order to remain competitive researchers attempt to design an adaptive control... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_68331656" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Petroleum industry production systems are highly automatized. In this industry, all functions (e.g., planning, scheduling and maintenance) are automated and in order to remain competitive researchers attempt to design an adaptive control system which optimizes the process, but also able to adapt to rapidly evolving demands at a fixed cost. In this paper, we present a multi-agent approach for the dynamic task scheduling in petroleum industry production system. Agents simultaneously insure effective production scheduling and the continuous improvement of the solution quality by means of reinforcement learning, using the SARSA algorithm. Reinforcement learning allows the agents to adapt, learning the best behaviors for their various roles without reducing the performance or reactivity. To demonstrate the innovation of our approach, we include a computer simulation of our model and the results of experimentation applying our model to an Algerian petroleum refinery.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/68331656" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="c8d589ab6d2394f134357aef6fceea0f" rel="nofollow" data-download="{"attachment_id":78843511,"asset_id":68331656,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/78843511/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="112368719" href="https://univ-oran.academia.edu/nassimaaissani">nassima aissani</a><script data-card-contents-for-user="112368719" type="text/json">{"id":112368719,"first_name":"nassima","last_name":"aissani","domain_name":"univ-oran","page_name":"nassimaaissani","display_name":"nassima aissani","profile_url":"https://univ-oran.academia.edu/nassimaaissani?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_68331656 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="68331656"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 68331656, container: ".js-paper-rank-work_68331656", }); });</script></li><li class="js-percentile-work_68331656 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 68331656; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_68331656"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_68331656 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="68331656"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 68331656; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=68331656]").text(description); $(".js-view-count-work_68331656").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_68331656").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="68331656"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">12</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="8422" rel="nofollow" href="https://www.academia.edu/Documents/in/Adaptive_Control">Adaptive Control</a>, <script data-card-contents-for-ri="8422" type="text/json">{"id":8422,"name":"Adaptive Control","url":"https://www.academia.edu/Documents/in/Adaptive_Control?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="13602" rel="nofollow" href="https://www.academia.edu/Documents/in/Continuous_Improvement">Continuous Improvement</a>, <script data-card-contents-for-ri="13602" type="text/json">{"id":13602,"name":"Continuous Improvement","url":"https://www.academia.edu/Documents/in/Continuous_Improvement?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="45873" rel="nofollow" href="https://www.academia.edu/Documents/in/Multi_Agent_System">Multi Agent System</a><script data-card-contents-for-ri="45873" type="text/json">{"id":45873,"name":"Multi Agent System","url":"https://www.academia.edu/Documents/in/Multi_Agent_System?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=68331656]'), work: {"id":68331656,"title":"Dynamic Scheduling in Petroleum Process using Reinforcement Learning","created_at":"2022-01-15T12:28:02.241-08:00","url":"https://www.academia.edu/68331656/Dynamic_Scheduling_in_Petroleum_Process_using_Reinforcement_Learning?f_ri=1688","dom_id":"work_68331656","summary":"Petroleum industry production systems are highly automatized. In this industry, all functions (e.g., planning, scheduling and maintenance) are automated and in order to remain competitive researchers attempt to design an adaptive control system which optimizes the process, but also able to adapt to rapidly evolving demands at a fixed cost. In this paper, we present a multi-agent approach for the dynamic task scheduling in petroleum industry production system. Agents simultaneously insure effective production scheduling and the continuous improvement of the solution quality by means of reinforcement learning, using the SARSA algorithm. Reinforcement learning allows the agents to adapt, learning the best behaviors for their various roles without reducing the performance or reactivity. To demonstrate the innovation of our approach, we include a computer simulation of our model and the results of experimentation applying our model to an Algerian petroleum refinery.","downloadable_attachments":[{"id":78843511,"asset_id":68331656,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":112368719,"first_name":"nassima","last_name":"aissani","domain_name":"univ-oran","page_name":"nassimaaissani","display_name":"nassima aissani","profile_url":"https://univ-oran.academia.edu/nassimaaissani?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":8422,"name":"Adaptive Control","url":"https://www.academia.edu/Documents/in/Adaptive_Control?f_ri=1688","nofollow":true},{"id":13602,"name":"Continuous Improvement","url":"https://www.academia.edu/Documents/in/Continuous_Improvement?f_ri=1688","nofollow":true},{"id":45873,"name":"Multi Agent System","url":"https://www.academia.edu/Documents/in/Multi_Agent_System?f_ri=1688","nofollow":true},{"id":69542,"name":"Computer Simulation","url":"https://www.academia.edu/Documents/in/Computer_Simulation?f_ri=1688"},{"id":82760,"name":"Production Scheduling","url":"https://www.academia.edu/Documents/in/Production_Scheduling?f_ri=1688"},{"id":152868,"name":"Adaptive learning","url":"https://www.academia.edu/Documents/in/Adaptive_learning?f_ri=1688"},{"id":559035,"name":"Reactive Scheduling","url":"https://www.academia.edu/Documents/in/Reactive_Scheduling?f_ri=1688"},{"id":718456,"name":"Petroleum Industry","url":"https://www.academia.edu/Documents/in/Petroleum_Industry?f_ri=1688"},{"id":743999,"name":"Production System","url":"https://www.academia.edu/Documents/in/Production_System?f_ri=1688"},{"id":1179741,"name":"Task Scheduling","url":"https://www.academia.edu/Documents/in/Task_Scheduling?f_ri=1688"},{"id":1212482,"name":"Dynamic scheduling","url":"https://www.academia.edu/Documents/in/Dynamic_scheduling?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_25904624" data-work_id="25904624" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/25904624/Dopaminergic_and_prefrontal_contributions_to_reward_based_learning_and_outcome_monitoring_during_child_development_and_aging">Dopaminergic and prefrontal contributions to reward-based learning and outcome monitoring during child development and aging</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">In many instances, children and older adults show similar difficulties in reward-based learning and outcome monitoring. These impairments are most pronounced in situations in which reward is uncertain (e.g., probabilistic reward... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_25904624" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">In many instances, children and older adults show similar difficulties in reward-based learning and outcome monitoring. These impairments are most pronounced in situations in which reward is uncertain (e.g., probabilistic reward schedules) and if outcome information is ambiguous (e.g., the relative value of outcomes has to be learned). Furthermore, whereas children show a greater sensitivity to external outcome information, older adults focus less on a rapid differentiation of rewarding outcomes. In this article, we review evidence for the idea that these phenomenologically similar impairments in learning and outcome monitoring in children and older adults can be attributed to deficits in different underlying neurophysiological mechanisms. We propose that in older adults learning impairments are the result of reduced dopaminergic projections to the ventromedial prefrontal cortex, which lead to less differentiated representations of reward value. In contrast, in children, impairments in learning can be primarily attributed to deficits in executive control, which may be due to a protracted development of the dorsal medial and lateral prefrontal cortices. We think that this framework maps well onto recent neurophysiological models of reward processing and is plausible from a broader developmental perspective.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/25904624" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="0be7eb9435c221d79d9cb457c16cddef" rel="nofollow" data-download="{"attachment_id":46268677,"asset_id":25904624,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/46268677/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="49699370" href="https://independent.academia.edu/DorotheaHammerer">Dorothea Hammerer</a><script data-card-contents-for-user="49699370" type="text/json">{"id":49699370,"first_name":"Dorothea","last_name":"Hammerer","domain_name":"independent","page_name":"DorotheaHammerer","display_name":"Dorothea Hammerer","profile_url":"https://independent.academia.edu/DorotheaHammerer?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_25904624 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="25904624"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 25904624, container: ".js-paper-rank-work_25904624", }); });</script></li><li class="js-percentile-work_25904624 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 25904624; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_25904624"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_25904624 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="25904624"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 25904624; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=25904624]").text(description); $(".js-view-count-work_25904624").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_25904624").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="25904624"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">19</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="221" rel="nofollow" href="https://www.academia.edu/Documents/in/Psychology">Psychology</a>, <script data-card-contents-for-ri="221" type="text/json">{"id":221,"name":"Psychology","url":"https://www.academia.edu/Documents/in/Psychology?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="237" rel="nofollow" href="https://www.academia.edu/Documents/in/Cognitive_Science">Cognitive Science</a>, <script data-card-contents-for-ri="237" type="text/json">{"id":237,"name":"Cognitive Science","url":"https://www.academia.edu/Documents/in/Cognitive_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="252" rel="nofollow" href="https://www.academia.edu/Documents/in/Developmental_Psychology">Developmental Psychology</a>, <script data-card-contents-for-ri="252" type="text/json">{"id":252,"name":"Developmental Psychology","url":"https://www.academia.edu/Documents/in/Developmental_Psychology?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1681" rel="nofollow" href="https://www.academia.edu/Documents/in/Decision_Making">Decision Making</a><script data-card-contents-for-ri="1681" type="text/json">{"id":1681,"name":"Decision Making","url":"https://www.academia.edu/Documents/in/Decision_Making?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=25904624]'), work: {"id":25904624,"title":"Dopaminergic and prefrontal contributions to reward-based learning and outcome monitoring during child development and aging","created_at":"2016-06-05T23:55:38.544-07:00","url":"https://www.academia.edu/25904624/Dopaminergic_and_prefrontal_contributions_to_reward_based_learning_and_outcome_monitoring_during_child_development_and_aging?f_ri=1688","dom_id":"work_25904624","summary":"In many instances, children and older adults show similar difficulties in reward-based learning and outcome monitoring. These impairments are most pronounced in situations in which reward is uncertain (e.g., probabilistic reward schedules) and if outcome information is ambiguous (e.g., the relative value of outcomes has to be learned). Furthermore, whereas children show a greater sensitivity to external outcome information, older adults focus less on a rapid differentiation of rewarding outcomes. In this article, we review evidence for the idea that these phenomenologically similar impairments in learning and outcome monitoring in children and older adults can be attributed to deficits in different underlying neurophysiological mechanisms. We propose that in older adults learning impairments are the result of reduced dopaminergic projections to the ventromedial prefrontal cortex, which lead to less differentiated representations of reward value. In contrast, in children, impairments in learning can be primarily attributed to deficits in executive control, which may be due to a protracted development of the dorsal medial and lateral prefrontal cortices. We think that this framework maps well onto recent neurophysiological models of reward processing and is plausible from a broader developmental perspective.","downloadable_attachments":[{"id":46268677,"asset_id":25904624,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":49699370,"first_name":"Dorothea","last_name":"Hammerer","domain_name":"independent","page_name":"DorotheaHammerer","display_name":"Dorothea Hammerer","profile_url":"https://independent.academia.edu/DorotheaHammerer?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":221,"name":"Psychology","url":"https://www.academia.edu/Documents/in/Psychology?f_ri=1688","nofollow":true},{"id":237,"name":"Cognitive Science","url":"https://www.academia.edu/Documents/in/Cognitive_Science?f_ri=1688","nofollow":true},{"id":252,"name":"Developmental Psychology","url":"https://www.academia.edu/Documents/in/Developmental_Psychology?f_ri=1688","nofollow":true},{"id":1681,"name":"Decision Making","url":"https://www.academia.edu/Documents/in/Decision_Making?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688"},{"id":3770,"name":"Metabolism","url":"https://www.academia.edu/Documents/in/Metabolism?f_ri=1688"},{"id":4583,"name":"Child Development","url":"https://www.academia.edu/Documents/in/Child_Development?f_ri=1688"},{"id":6791,"name":"Aging","url":"https://www.academia.edu/Documents/in/Aging?f_ri=1688"},{"id":28576,"name":"Prefrontal Cortex","url":"https://www.academia.edu/Documents/in/Prefrontal_Cortex?f_ri=1688"},{"id":32362,"name":"Executive Control","url":"https://www.academia.edu/Documents/in/Executive_Control?f_ri=1688"},{"id":43774,"name":"Learning","url":"https://www.academia.edu/Documents/in/Learning?f_ri=1688"},{"id":49021,"name":"Reward","url":"https://www.academia.edu/Documents/in/Reward?f_ri=1688"},{"id":51566,"name":"Dopamine","url":"https://www.academia.edu/Documents/in/Dopamine?f_ri=1688"},{"id":64933,"name":"Child","url":"https://www.academia.edu/Documents/in/Child?f_ri=1688"},{"id":208996,"name":"Cognitive processes","url":"https://www.academia.edu/Documents/in/Cognitive_processes?f_ri=1688"},{"id":428130,"name":"Cognitive Ability","url":"https://www.academia.edu/Documents/in/Cognitive_Ability?f_ri=1688"},{"id":806178,"name":"Older Adult","url":"https://www.academia.edu/Documents/in/Older_Adult?f_ri=1688"},{"id":1127828,"name":"Life Span","url":"https://www.academia.edu/Documents/in/Life_Span?f_ri=1688"},{"id":1565778,"name":"Ventromedial Prefrontal Cortex","url":"https://www.academia.edu/Documents/in/Ventromedial_Prefrontal_Cortex?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_69744631" data-work_id="69744631" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/69744631/The_Origin_of_the_Speeches_language_evolution_through_collaborative_reinforcement_learning">The Origin of the Speeches: language evolution through collaborative reinforcement learning</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">This project proposes that language evolve through reinforcement learning where agents communicate with each other and provide rewards if communication is successful. The fundamental difference between the learning mechanisms that humans... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_69744631" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">This project proposes that language evolve through reinforcement learning where agents communicate with each other and provide rewards if communication is successful. The fundamental difference between the learning mechanisms that humans use to communicate with one another and how machines learn to communicate is that the system used by humans presupposes that the adult already knows the meanings associated with the human language. Languages evolve historically to be optimal communication systems where human language learning mechanisms have evolved in order to learn these systems more efficiently. Machines in their learning of natural language, have to start at a place that humans mastered thousands of years ago. Uttering previously unheard signals and collectively establishing meaning. The question that this paper deals with is how can a communication system evolve if none of the conspirators have mastered the system previously using evolutionary computation and reinforcement lear...</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/69744631" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="877e586fb75834ef03eb43868c6e2953" rel="nofollow" data-download="{"attachment_id":79722536,"asset_id":69744631,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/79722536/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="49271887" href="https://independent.academia.edu/WalsheRay">Ray Walshe</a><script data-card-contents-for-user="49271887" type="text/json">{"id":49271887,"first_name":"Ray","last_name":"Walshe","domain_name":"independent","page_name":"WalsheRay","display_name":"Ray Walshe","profile_url":"https://independent.academia.edu/WalsheRay?f_ri=1688","photo":"https://0.academia-photos.com/49271887/14265322/52158106/s65_ray.walshe.png"}</script></span></span></li><li class="js-paper-rank-work_69744631 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="69744631"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 69744631, container: ".js-paper-rank-work_69744631", }); });</script></li><li class="js-percentile-work_69744631 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 69744631; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_69744631"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_69744631 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="69744631"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 69744631; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=69744631]").text(description); $(".js-view-count-work_69744631").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_69744631").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="69744631"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">11</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a>, <script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="6642" rel="nofollow" href="https://www.academia.edu/Documents/in/Language_Evolution">Language Evolution</a><script data-card-contents-for-ri="6642" type="text/json">{"id":6642,"name":"Language Evolution","url":"https://www.academia.edu/Documents/in/Language_Evolution?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=69744631]'), work: {"id":69744631,"title":"The Origin of the Speeches: language evolution through collaborative reinforcement learning","created_at":"2022-01-28T01:04:41.419-08:00","url":"https://www.academia.edu/69744631/The_Origin_of_the_Speeches_language_evolution_through_collaborative_reinforcement_learning?f_ri=1688","dom_id":"work_69744631","summary":"This project proposes that language evolve through reinforcement learning where agents communicate with each other and provide rewards if communication is successful. The fundamental difference between the learning mechanisms that humans use to communicate with one another and how machines learn to communicate is that the system used by humans presupposes that the adult already knows the meanings associated with the human language. Languages evolve historically to be optimal communication systems where human language learning mechanisms have evolved in order to learn these systems more efficiently. Machines in their learning of natural language, have to start at a place that humans mastered thousands of years ago. Uttering previously unheard signals and collectively establishing meaning. The question that this paper deals with is how can a communication system evolve if none of the conspirators have mastered the system previously using evolutionary computation and reinforcement lear...","downloadable_attachments":[{"id":79722536,"asset_id":69744631,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":49271887,"first_name":"Ray","last_name":"Walshe","domain_name":"independent","page_name":"WalsheRay","display_name":"Ray Walshe","profile_url":"https://independent.academia.edu/WalsheRay?f_ri=1688","photo":"https://0.academia-photos.com/49271887/14265322/52158106/s65_ray.walshe.png"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":6642,"name":"Language Evolution","url":"https://www.academia.edu/Documents/in/Language_Evolution?f_ri=1688","nofollow":true},{"id":30524,"name":"Intelligent Virtual Agents","url":"https://www.academia.edu/Documents/in/Intelligent_Virtual_Agents?f_ri=1688"},{"id":42555,"name":"Communication System","url":"https://www.academia.edu/Documents/in/Communication_System?f_ri=1688"},{"id":50569,"name":"Language Learning","url":"https://www.academia.edu/Documents/in/Language_Learning?f_ri=1688"},{"id":97618,"name":"Natural language","url":"https://www.academia.edu/Documents/in/Natural_language?f_ri=1688"},{"id":232085,"name":"IS success","url":"https://www.academia.edu/Documents/in/IS_success?f_ri=1688"},{"id":252813,"name":"Evolutionary Computing","url":"https://www.academia.edu/Documents/in/Evolutionary_Computing?f_ri=1688"},{"id":1804246,"name":"Agent Communication","url":"https://www.academia.edu/Documents/in/Agent_Communication?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_42970676" data-work_id="42970676" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/42970676/Shortest_Route_Analysis_for_Road_Accident_Emergency_using_Dijkstra_Algorithm_and_Fuzzy_Logic">Shortest Route Analysis for Road Accident Emergency using Dijkstra Algorithm and Fuzzy Logic</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Victims of road traffic accidents face severe health problems on-site or after the event when they arrive at hospital lately in their emergency cycle. Road traffic accident has negative effect on the physical, social and emotional... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_42970676" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Victims of road traffic accidents face severe health problems on-site or after the event when they arrive at hospital lately in their emergency cycle. Road traffic accident has negative effect on the physical, social and emotional security of human lives which often lead to mortality, illness, pain, grief and even disability. This paper proposes a scheme that reduces the severity of road traffic accidents given its inevitable occurrence. The rational is to search for nearest hospitals to the accident location using Dijkstra algorithm and Fuzzy logic to recommend suitable hospitals out of list of nearest hospitals to timely attend to the emergency situation considering factors such as distance, severity of the accident, available facilities in the hospitals and other factors. The obtained results showed the practicability of the system to recommendation of quick solution to accident emergencies.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/42970676" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="f0eb0c3b460ee36c55b02c7afa742954" rel="nofollow" data-download="{"attachment_id":63228656,"asset_id":42970676,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/63228656/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="157171415" href="https://independent.academia.edu/TaiwoGabrielOmomule">Taiwo Gabriel Omomule</a><script data-card-contents-for-user="157171415" type="text/json">{"id":157171415,"first_name":"Taiwo Gabriel","last_name":"Omomule","domain_name":"independent","page_name":"TaiwoGabrielOmomule","display_name":"Taiwo Gabriel Omomule","profile_url":"https://independent.academia.edu/TaiwoGabrielOmomule?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_42970676 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="42970676"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 42970676, container: ".js-paper-rank-work_42970676", }); });</script></li><li class="js-percentile-work_42970676 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 42970676; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_42970676"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_42970676 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="42970676"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 42970676; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=42970676]").text(description); $(".js-view-count-work_42970676").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_42970676").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="42970676"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">12</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="465" rel="nofollow" href="https://www.academia.edu/Documents/in/Artificial_Intelligence">Artificial Intelligence</a>, <script data-card-contents-for-ri="465" type="text/json">{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a>, <script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2009" rel="nofollow" href="https://www.academia.edu/Documents/in/Data_Mining">Data Mining</a><script data-card-contents-for-ri="2009" type="text/json">{"id":2009,"name":"Data Mining","url":"https://www.academia.edu/Documents/in/Data_Mining?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=42970676]'), work: {"id":42970676,"title":"Shortest Route Analysis for Road Accident Emergency using Dijkstra Algorithm and Fuzzy Logic","created_at":"2020-05-07T06:49:29.527-07:00","url":"https://www.academia.edu/42970676/Shortest_Route_Analysis_for_Road_Accident_Emergency_using_Dijkstra_Algorithm_and_Fuzzy_Logic?f_ri=1688","dom_id":"work_42970676","summary":"Victims of road traffic accidents face severe health problems on-site or after the event when they arrive at hospital lately in their emergency cycle. Road traffic accident has negative effect on the physical, social and emotional security of human lives which often lead to mortality, illness, pain, grief and even disability. This paper proposes a scheme that reduces the severity of road traffic accidents given its inevitable occurrence. The rational is to search for nearest hospitals to the accident location using Dijkstra algorithm and Fuzzy logic to recommend suitable hospitals out of list of nearest hospitals to timely attend to the emergency situation considering factors such as distance, severity of the accident, available facilities in the hospitals and other factors. The obtained results showed the practicability of the system to recommendation of quick solution to accident emergencies.","downloadable_attachments":[{"id":63228656,"asset_id":42970676,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":157171415,"first_name":"Taiwo Gabriel","last_name":"Omomule","domain_name":"independent","page_name":"TaiwoGabrielOmomule","display_name":"Taiwo Gabriel Omomule","profile_url":"https://independent.academia.edu/TaiwoGabrielOmomule?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":2009,"name":"Data Mining","url":"https://www.academia.edu/Documents/in/Data_Mining?f_ri=1688","nofollow":true},{"id":4803,"name":"Active Learning","url":"https://www.academia.edu/Documents/in/Active_Learning?f_ri=1688"},{"id":12512,"name":"Data Quality (Computer Science)","url":"https://www.academia.edu/Documents/in/Data_Quality_Computer_Science_?f_ri=1688"},{"id":15084,"name":"Statistical machine learning","url":"https://www.academia.edu/Documents/in/Statistical_machine_learning?f_ri=1688"},{"id":49339,"name":"Data Streams","url":"https://www.academia.edu/Documents/in/Data_Streams?f_ri=1688"},{"id":69100,"name":"Data Science","url":"https://www.academia.edu/Documents/in/Data_Science?f_ri=1688"},{"id":81182,"name":"Deep Learning","url":"https://www.academia.edu/Documents/in/Deep_Learning?f_ri=1688"},{"id":126300,"name":"Big Data","url":"https://www.academia.edu/Documents/in/Big_Data?f_ri=1688"},{"id":413148,"name":"Big Data / Analytics / Data Mining","url":"https://www.academia.edu/Documents/in/Big_Data_Analytics_Data_Mining?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_40969161" data-work_id="40969161" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/40969161/Curiosity_Driven_Reinforcement_Learning_for_Dialogue_Management">Curiosity-Driven Reinforcement Learning for Dialogue Management</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Obtaining an effective reward signal for dialogue management is a non trivial problem. Real user feedback is inconsistent and often even absent. This thesis investigates the use of intrinsic rewards for a reinforcement learning based... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_40969161" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Obtaining an effective reward signal for dialogue management is a non trivial problem. Real user feedback is inconsistent and often even absent. This thesis investigates the use of intrinsic rewards for a reinforcement learning based dialogue manager in order to improve policy learning in an environment with sparse rewards and to move away from inefficient random ε-greedy exploration. In addition to rewards given by a user simulator for successful dialogues, intrinsic curiosity rewards are given in the form of belief-state prediction errors generated by an intrinsic curiosity module within the dialogue manager. We investigate two main settings for this method: (1) predicting the raw next belief-state, and (2) predicting belief-states in a learned feature space. In order to meet the right difficulty level for the system to be able to learn a feature space, the model is pre-trained on a small pool of dialogue transitions. For both settings, results comparable to and better than simple ε-greedy exploration are achieved. (1) is able to learn faster, but (2) achieves higher final results and has more potential for improvements and to be successful in larger state-action spaces, where feature encodings and generalization are beneficial.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/40969161" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="e0c4c7887f6f55a7b43f91be9ebea217" rel="nofollow" data-download="{"attachment_id":61251965,"asset_id":40969161,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/61251965/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="128533122" href="https://independent.academia.edu/NicolasParisi1">Michael Chinkers</a><script data-card-contents-for-user="128533122" type="text/json">{"id":128533122,"first_name":"Michael","last_name":"Chinkers","domain_name":"independent","page_name":"NicolasParisi1","display_name":"Michael Chinkers","profile_url":"https://independent.academia.edu/NicolasParisi1?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_40969161 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="40969161"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 40969161, container: ".js-paper-rank-work_40969161", }); });</script></li><li class="js-percentile-work_40969161 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 40969161; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_40969161"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_40969161 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="40969161"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 40969161; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=40969161]").text(description); $(".js-view-count-work_40969161").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_40969161").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="40969161"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">5</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="77" rel="nofollow" href="https://www.academia.edu/Documents/in/Robotics">Robotics</a>, <script data-card-contents-for-ri="77" type="text/json">{"id":77,"name":"Robotics","url":"https://www.academia.edu/Documents/in/Robotics?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="465" rel="nofollow" href="https://www.academia.edu/Documents/in/Artificial_Intelligence">Artificial Intelligence</a>, <script data-card-contents-for-ri="465" type="text/json">{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="5175" rel="nofollow" href="https://www.academia.edu/Documents/in/Dialogue">Dialogue</a><script data-card-contents-for-ri="5175" type="text/json">{"id":5175,"name":"Dialogue","url":"https://www.academia.edu/Documents/in/Dialogue?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=40969161]'), work: {"id":40969161,"title":"Curiosity-Driven Reinforcement Learning for Dialogue Management","created_at":"2019-11-18T03:28:15.449-08:00","url":"https://www.academia.edu/40969161/Curiosity_Driven_Reinforcement_Learning_for_Dialogue_Management?f_ri=1688","dom_id":"work_40969161","summary":"Obtaining an effective reward signal for dialogue management is a non trivial problem. Real user feedback is inconsistent and often even absent. This thesis investigates the use of intrinsic rewards for a reinforcement learning based dialogue manager in order to improve policy learning in an environment with sparse rewards and to move away from inefficient random ε-greedy exploration. In addition to rewards given by a user simulator for successful dialogues, intrinsic curiosity rewards are given in the form of belief-state prediction errors generated by an intrinsic curiosity module within the dialogue manager. We investigate two main settings for this method: (1) predicting the raw next belief-state, and (2) predicting belief-states in a learned feature space. In order to meet the right difficulty level for the system to be able to learn a feature space, the model is pre-trained on a small pool of dialogue transitions. For both settings, results comparable to and better than simple ε-greedy exploration are achieved. (1) is able to learn faster, but (2) achieves higher final results and has more potential for improvements and to be successful in larger state-action spaces, where feature encodings and generalization are beneficial.","downloadable_attachments":[{"id":61251965,"asset_id":40969161,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":128533122,"first_name":"Michael","last_name":"Chinkers","domain_name":"independent","page_name":"NicolasParisi1","display_name":"Michael Chinkers","profile_url":"https://independent.academia.edu/NicolasParisi1?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":77,"name":"Robotics","url":"https://www.academia.edu/Documents/in/Robotics?f_ri=1688","nofollow":true},{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":5175,"name":"Dialogue","url":"https://www.academia.edu/Documents/in/Dialogue?f_ri=1688","nofollow":true},{"id":81182,"name":"Deep Learning","url":"https://www.academia.edu/Documents/in/Deep_Learning?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_32011434" data-work_id="32011434" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/32011434/Reinforcement_Learning_with_Raw_Image_Pixels_as_Input_State">Reinforcement Learning with Raw Image Pixels as Input State</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">We report in this paper some positive simulation results obtained when image pixels are directly used as input state of a reinforcement learning algorithm. The reinforcement learning algorithm chosen to carry out the simulation is a... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_32011434" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">We report in this paper some positive simulation results obtained when image pixels are directly used as input state of a reinforcement learning algorithm. The reinforcement learning algorithm chosen to carry out the simulation is a batch-mode algorithm known as fitted Q iteration.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/32011434" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="a951d9e8562a5f5759bcb021cf8b0096" rel="nofollow" data-download="{"attachment_id":52277996,"asset_id":32011434,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/52277996/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="61913254" href="https://independent.academia.edu/Rapha%C3%ABlMar%C3%A9e">Raphaël Marée</a><script data-card-contents-for-user="61913254" type="text/json">{"id":61913254,"first_name":"Raphaël","last_name":"Marée","domain_name":"independent","page_name":"RaphaëlMarée","display_name":"Raphaël Marée","profile_url":"https://independent.academia.edu/Rapha%C3%ABlMar%C3%A9e?f_ri=1688","photo":"https://gravatar.com/avatar/e235f3f9f78901a16eb97d7781ecae87?s=65"}</script></span></span></li><li class="js-paper-rank-work_32011434 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="32011434"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 32011434, container: ".js-paper-rank-work_32011434", }); });</script></li><li class="js-percentile-work_32011434 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 32011434; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_32011434"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_32011434 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="32011434"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 32011434; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=32011434]").text(description); $(".js-view-count-work_32011434").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_32011434").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="32011434"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">6</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="465" rel="nofollow" href="https://www.academia.edu/Documents/in/Artificial_Intelligence">Artificial Intelligence</a>, <script data-card-contents-for-ri="465" type="text/json">{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="854" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Vision">Computer Vision</a>, <script data-card-contents-for-ri="854" type="text/json">{"id":854,"name":"Computer Vision","url":"https://www.academia.edu/Documents/in/Computer_Vision?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1185" rel="nofollow" href="https://www.academia.edu/Documents/in/Image_Processing">Image Processing</a>, <script data-card-contents-for-ri="1185" type="text/json">{"id":1185,"name":"Image Processing","url":"https://www.academia.edu/Documents/in/Image_Processing?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a><script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=32011434]'), work: {"id":32011434,"title":"Reinforcement Learning with Raw Image Pixels as Input State","created_at":"2017-03-23T12:38:13.272-07:00","url":"https://www.academia.edu/32011434/Reinforcement_Learning_with_Raw_Image_Pixels_as_Input_State?f_ri=1688","dom_id":"work_32011434","summary":"We report in this paper some positive simulation results obtained when image pixels are directly used as input state of a reinforcement learning algorithm. The reinforcement learning algorithm chosen to carry out the simulation is a batch-mode algorithm known as fitted Q iteration.","downloadable_attachments":[{"id":52277996,"asset_id":32011434,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":61913254,"first_name":"Raphaël","last_name":"Marée","domain_name":"independent","page_name":"RaphaëlMarée","display_name":"Raphaël Marée","profile_url":"https://independent.academia.edu/Rapha%C3%ABlMar%C3%A9e?f_ri=1688","photo":"https://gravatar.com/avatar/e235f3f9f78901a16eb97d7781ecae87?s=65"}],"research_interests":[{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true},{"id":854,"name":"Computer Vision","url":"https://www.academia.edu/Documents/in/Computer_Vision?f_ri=1688","nofollow":true},{"id":1185,"name":"Image Processing","url":"https://www.academia.edu/Documents/in/Image_Processing?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":903929,"name":"Batch Process","url":"https://www.academia.edu/Documents/in/Batch_Process?f_ri=1688"},{"id":2500540,"name":"Pattern analysis","url":"https://www.academia.edu/Documents/in/Pattern_analysis?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_451810" data-work_id="451810" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/451810/On_the_Numeric_Stability_of_Gaussian_Processes_Regression_for_Relational_Reinforcement_Learning">On the Numeric Stability of Gaussian Processes Regression for Relational Reinforcement Learning</a></div></div><div class="u-pb4x u-mt3x"></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/451810" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="030512575a1fa4d4b3b0d479103d9503" rel="nofollow" data-download="{"attachment_id":2014823,"asset_id":451810,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/2014823/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="339501" href="https://kuleuven.academia.edu/JanRamon">Jan Ramon</a><script data-card-contents-for-user="339501" type="text/json">{"id":339501,"first_name":"Jan","last_name":"Ramon","domain_name":"kuleuven","page_name":"JanRamon","display_name":"Jan Ramon","profile_url":"https://kuleuven.academia.edu/JanRamon?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_451810 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="451810"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 451810, container: ".js-paper-rank-work_451810", }); });</script></li><li class="js-percentile-work_451810 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 451810; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_451810"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_451810 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="451810"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 451810; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=451810]").text(description); $(".js-view-count-work_451810").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_451810").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="451810"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">7</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="72253" rel="nofollow" href="https://www.academia.edu/Documents/in/Matrix_Inversion">Matrix Inversion</a>, <script data-card-contents-for-ri="72253" type="text/json">{"id":72253,"name":"Matrix Inversion","url":"https://www.academia.edu/Documents/in/Matrix_Inversion?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="584555" rel="nofollow" href="https://www.academia.edu/Documents/in/Relational_Reinforcement_Learning">Relational Reinforcement Learning</a>, <script data-card-contents-for-ri="584555" type="text/json">{"id":584555,"name":"Relational Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Relational_Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="606723" rel="nofollow" href="https://www.academia.edu/Documents/in/Numerical_Stability">Numerical Stability</a><script data-card-contents-for-ri="606723" type="text/json">{"id":606723,"name":"Numerical Stability","url":"https://www.academia.edu/Documents/in/Numerical_Stability?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=451810]'), work: {"id":451810,"title":"On the Numeric Stability of Gaussian Processes Regression for Relational Reinforcement Learning","created_at":"2011-02-21T20:23:00.929-08:00","url":"https://www.academia.edu/451810/On_the_Numeric_Stability_of_Gaussian_Processes_Regression_for_Relational_Reinforcement_Learning?f_ri=1688","dom_id":"work_451810","summary":null,"downloadable_attachments":[{"id":2014823,"asset_id":451810,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":339501,"first_name":"Jan","last_name":"Ramon","domain_name":"kuleuven","page_name":"JanRamon","display_name":"Jan Ramon","profile_url":"https://kuleuven.academia.edu/JanRamon?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":72253,"name":"Matrix Inversion","url":"https://www.academia.edu/Documents/in/Matrix_Inversion?f_ri=1688","nofollow":true},{"id":584555,"name":"Relational Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Relational_Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":606723,"name":"Numerical Stability","url":"https://www.academia.edu/Documents/in/Numerical_Stability?f_ri=1688","nofollow":true},{"id":688446,"name":"Gaussian Process","url":"https://www.academia.edu/Documents/in/Gaussian_Process?f_ri=1688"},{"id":958135,"name":"Gaussian Process Regression","url":"https://www.academia.edu/Documents/in/Gaussian_Process_Regression?f_ri=1688"},{"id":1006158,"name":"QR Factorization","url":"https://www.academia.edu/Documents/in/QR_Factorization?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_507907" data-work_id="507907" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/507907/Pessimistic_cost_sensitive_active_learning_of_decision_trees_for_profit_maximizing_targeting_campaigns">Pessimistic cost-sensitive active learning of decision trees for profit maximizing targeting campaigns</a></div></div><div class="u-pb4x u-mt3x"></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/507907" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="1e61c182a1d346dbdbc6481e548b080d" rel="nofollow" data-download="{"attachment_id":2404860,"asset_id":507907,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/2404860/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="386829" href="https://bgu.academia.edu/ArminShmilovici">Armin Shmilovici</a><script data-card-contents-for-user="386829" type="text/json">{"id":386829,"first_name":"Armin","last_name":"Shmilovici","domain_name":"bgu","page_name":"ArminShmilovici","display_name":"Armin Shmilovici","profile_url":"https://bgu.academia.edu/ArminShmilovici?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_507907 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="507907"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 507907, container: ".js-paper-rank-work_507907", }); });</script></li><li class="js-percentile-work_507907 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 507907; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_507907"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_507907 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="507907"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 507907; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=507907]").text(description); $(".js-view-count-work_507907").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_507907").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="507907"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">15</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="37" rel="nofollow" href="https://www.academia.edu/Documents/in/Information_Systems">Information Systems</a>, <script data-card-contents-for-ri="37" type="text/json">{"id":37,"name":"Information Systems","url":"https://www.academia.edu/Documents/in/Information_Systems?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="4803" rel="nofollow" href="https://www.academia.edu/Documents/in/Active_Learning">Active Learning</a>, <script data-card-contents-for-ri="4803" type="text/json">{"id":4803,"name":"Active Learning","url":"https://www.academia.edu/Documents/in/Active_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="9507" rel="nofollow" href="https://www.academia.edu/Documents/in/Design_of_Experiments">Design of Experiments</a><script data-card-contents-for-ri="9507" type="text/json">{"id":9507,"name":"Design of Experiments","url":"https://www.academia.edu/Documents/in/Design_of_Experiments?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=507907]'), work: {"id":507907,"title":"Pessimistic cost-sensitive active learning of decision trees for profit maximizing targeting campaigns","created_at":"2011-04-03T06:30:12.875-07:00","url":"https://www.academia.edu/507907/Pessimistic_cost_sensitive_active_learning_of_decision_trees_for_profit_maximizing_targeting_campaigns?f_ri=1688","dom_id":"work_507907","summary":null,"downloadable_attachments":[{"id":2404860,"asset_id":507907,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":386829,"first_name":"Armin","last_name":"Shmilovici","domain_name":"bgu","page_name":"ArminShmilovici","display_name":"Armin Shmilovici","profile_url":"https://bgu.academia.edu/ArminShmilovici?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":37,"name":"Information Systems","url":"https://www.academia.edu/Documents/in/Information_Systems?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":4803,"name":"Active Learning","url":"https://www.academia.edu/Documents/in/Active_Learning?f_ri=1688","nofollow":true},{"id":9507,"name":"Design of Experiments","url":"https://www.academia.edu/Documents/in/Design_of_Experiments?f_ri=1688","nofollow":true},{"id":12814,"name":"Direct Marketing","url":"https://www.academia.edu/Documents/in/Direct_Marketing?f_ri=1688"},{"id":53338,"name":"Decision Trees","url":"https://www.academia.edu/Documents/in/Decision_Trees?f_ri=1688"},{"id":162271,"name":"Decision Tree","url":"https://www.academia.edu/Documents/in/Decision_Tree?f_ri=1688"},{"id":209305,"name":"Design of experiment","url":"https://www.academia.edu/Documents/in/Design_of_experiment?f_ri=1688"},{"id":254085,"name":"Data Mining and Knowledge Discovery","url":"https://www.academia.edu/Documents/in/Data_Mining_and_Knowledge_Discovery?f_ri=1688"},{"id":289812,"name":"Orthogonal Array","url":"https://www.academia.edu/Documents/in/Orthogonal_Array?f_ri=1688"},{"id":379262,"name":"Cost sensitive learning","url":"https://www.academia.edu/Documents/in/Cost_sensitive_learning?f_ri=1688"},{"id":688865,"name":"Datamine","url":"https://www.academia.edu/Documents/in/Datamine?f_ri=1688"},{"id":821516,"name":"Decision Maker","url":"https://www.academia.edu/Documents/in/Decision_Maker?f_ri=1688"},{"id":1138319,"name":"Learning Methods","url":"https://www.academia.edu/Documents/in/Learning_Methods?f_ri=1688"},{"id":2070034,"name":"Data Format","url":"https://www.academia.edu/Documents/in/Data_Format?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_15748138" data-work_id="15748138" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/15748138/A_SIMPLE_ACTOR_CRITIC_ALGORITHM_FOR_CONTINUOUS_ENVIRONMENTS">A SIMPLE ACTOR-CRITIC ALGORITHM FOR CONTINUOUS ENVIRONMENTS</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">In reference to methods analyzed recently by Sutton et al, and Konda & Tsitsiklis, we propose their modification called Randomized Policy Optimizer (RPO). The algorithm has a modular structure and is based on the value function rather... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_15748138" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">In reference to methods analyzed recently by Sutton et al, and Konda & Tsitsiklis, we propose their modification called Randomized Policy Optimizer (RPO). The algorithm has a modular structure and is based on the value function rather than on the action-value function. The modules include neural approximators and a parameterized distribution of con- trol actions. The distribution must belong to</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/15748138" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="44af09e7bd5964d0344b591c6e9a3eb8" rel="nofollow" data-download="{"attachment_id":42909909,"asset_id":15748138,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/42909909/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="34940484" href="https://pw.academia.edu/AndrzejPacut">Andrzej Pacut</a><script data-card-contents-for-user="34940484" type="text/json">{"id":34940484,"first_name":"Andrzej","last_name":"Pacut","domain_name":"pw","page_name":"AndrzejPacut","display_name":"Andrzej Pacut","profile_url":"https://pw.academia.edu/AndrzejPacut?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_15748138 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="15748138"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 15748138, container: ".js-paper-rank-work_15748138", }); });</script></li><li class="js-percentile-work_15748138 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 15748138; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_15748138"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_15748138 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="15748138"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 15748138; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=15748138]").text(description); $(".js-view-count-work_15748138").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_15748138").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="15748138"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">2</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="745006" rel="nofollow" href="https://www.academia.edu/Documents/in/Adaptive_Heuristic_Critic">Adaptive Heuristic Critic</a><script data-card-contents-for-ri="745006" type="text/json">{"id":745006,"name":"Adaptive Heuristic Critic","url":"https://www.academia.edu/Documents/in/Adaptive_Heuristic_Critic?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=15748138]'), work: {"id":15748138,"title":"A SIMPLE ACTOR-CRITIC ALGORITHM FOR CONTINUOUS ENVIRONMENTS","created_at":"2015-09-15T23:00:27.480-07:00","url":"https://www.academia.edu/15748138/A_SIMPLE_ACTOR_CRITIC_ALGORITHM_FOR_CONTINUOUS_ENVIRONMENTS?f_ri=1688","dom_id":"work_15748138","summary":"In reference to methods analyzed recently by Sutton et al, and Konda \u0026 Tsitsiklis, we propose their modification called Randomized Policy Optimizer (RPO). The algorithm has a modular structure and is based on the value function rather than on the action-value function. The modules include neural approximators and a parameterized distribution of con- trol actions. The distribution must belong to","downloadable_attachments":[{"id":42909909,"asset_id":15748138,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":34940484,"first_name":"Andrzej","last_name":"Pacut","domain_name":"pw","page_name":"AndrzejPacut","display_name":"Andrzej Pacut","profile_url":"https://pw.academia.edu/AndrzejPacut?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":745006,"name":"Adaptive Heuristic Critic","url":"https://www.academia.edu/Documents/in/Adaptive_Heuristic_Critic?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_16921591" data-work_id="16921591" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/16921591/Dynamics_of_Network_Selection_in_Heterogeneous_Wireless_Networks_An_Evolutionary_Game_Approach">Dynamics of Network Selection in Heterogeneous Wireless Networks: An Evolutionary Game Approach</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Abstract Next-generation wireless networks will integrate multiple wireless access technologies to provide seamless mobility to mobile users with high-speed wireless connectivity. This will give rise to a heterogeneous wireless access... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_16921591" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Abstract Next-generation wireless networks will integrate multiple wireless access technologies to provide seamless mobility to mobile users with high-speed wireless connectivity. This will give rise to a heterogeneous wireless access environment where ...</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/16921591" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="8cb3bc273ea5268b89e1ebc817a24585" rel="nofollow" data-download="{"attachment_id":42374203,"asset_id":16921591,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/42374203/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="36416056" href="https://umanitoba.academia.edu/EkramHossain">Ekram Hossain</a><script data-card-contents-for-user="36416056" type="text/json">{"id":36416056,"first_name":"Ekram","last_name":"Hossain","domain_name":"umanitoba","page_name":"EkramHossain","display_name":"Ekram Hossain","profile_url":"https://umanitoba.academia.edu/EkramHossain?f_ri=1688","photo":"https://0.academia-photos.com/36416056/10467419/11679665/s65_ekram.hossain.jpg"}</script></span></span></li><li class="js-paper-rank-work_16921591 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="16921591"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 16921591, container: ".js-paper-rank-work_16921591", }); });</script></li><li class="js-percentile-work_16921591 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 16921591; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_16921591"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_16921591 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="16921591"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 16921591; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=16921591]").text(description); $(".js-view-count-work_16921591").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_16921591").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="16921591"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">12</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="48" rel="nofollow" href="https://www.academia.edu/Documents/in/Engineering">Engineering</a>, <script data-card-contents-for-ri="48" type="text/json">{"id":48,"name":"Engineering","url":"https://www.academia.edu/Documents/in/Engineering?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="923" rel="nofollow" href="https://www.academia.edu/Documents/in/Technology">Technology</a>, <script data-card-contents-for-ri="923" type="text/json">{"id":923,"name":"Technology","url":"https://www.academia.edu/Documents/in/Technology?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="13113" rel="nofollow" href="https://www.academia.edu/Documents/in/Evolutionary_Game_Theory">Evolutionary Game Theory</a><script data-card-contents-for-ri="13113" type="text/json">{"id":13113,"name":"Evolutionary Game Theory","url":"https://www.academia.edu/Documents/in/Evolutionary_Game_Theory?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=16921591]'), work: {"id":16921591,"title":"Dynamics of Network Selection in Heterogeneous Wireless Networks: An Evolutionary Game Approach","created_at":"2015-10-17T14:56:34.901-07:00","url":"https://www.academia.edu/16921591/Dynamics_of_Network_Selection_in_Heterogeneous_Wireless_Networks_An_Evolutionary_Game_Approach?f_ri=1688","dom_id":"work_16921591","summary":"Abstract Next-generation wireless networks will integrate multiple wireless access technologies to provide seamless mobility to mobile users with high-speed wireless connectivity. This will give rise to a heterogeneous wireless access environment where ...","downloadable_attachments":[{"id":42374203,"asset_id":16921591,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":36416056,"first_name":"Ekram","last_name":"Hossain","domain_name":"umanitoba","page_name":"EkramHossain","display_name":"Ekram Hossain","profile_url":"https://umanitoba.academia.edu/EkramHossain?f_ri=1688","photo":"https://0.academia-photos.com/36416056/10467419/11679665/s65_ekram.hossain.jpg"}],"research_interests":[{"id":48,"name":"Engineering","url":"https://www.academia.edu/Documents/in/Engineering?f_ri=1688","nofollow":true},{"id":923,"name":"Technology","url":"https://www.academia.edu/Documents/in/Technology?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":13113,"name":"Evolutionary Game Theory","url":"https://www.academia.edu/Documents/in/Evolutionary_Game_Theory?f_ri=1688","nofollow":true},{"id":65429,"name":"Wireless Network","url":"https://www.academia.edu/Documents/in/Wireless_Network?f_ri=1688"},{"id":114995,"name":"Heterogeneous Wireless Networks","url":"https://www.academia.edu/Documents/in/Heterogeneous_Wireless_Networks?f_ri=1688"},{"id":177904,"name":"Nash Equilibrium","url":"https://www.academia.edu/Documents/in/Nash_Equilibrium?f_ri=1688"},{"id":317745,"name":"High Speed","url":"https://www.academia.edu/Documents/in/High_Speed?f_ri=1688"},{"id":357198,"name":"Wireless access","url":"https://www.academia.edu/Documents/in/Wireless_access?f_ri=1688"},{"id":545424,"name":"Load Balance","url":"https://www.academia.edu/Documents/in/Load_Balance?f_ri=1688"},{"id":606507,"name":"Wireless Access Network Deployment Strategy","url":"https://www.academia.edu/Documents/in/Wireless_Access_Network_Deployment_Strategy?f_ri=1688"},{"id":908186,"name":"Service Provider","url":"https://www.academia.edu/Documents/in/Service_Provider?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_7281384" data-work_id="7281384" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/7281384/Learning_to_Control_a_Low_Cost_Manipulator_using_Data_Efficient_Reinforcement_Learning">Learning to Control a Low-Cost Manipulator using Data-Efficient Reinforcement Learning</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Over the last years, there has been substantial progress in robust manipulation in unstructured environments. The long-term goal of our work is to get away from precise, but very expensive robotic systems and to develop affordable,... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_7281384" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Over the last years, there has been substantial progress in robust manipulation in unstructured environments. The long-term goal of our work is to get away from precise, but very expensive robotic systems and to develop affordable, potentially imprecise, self-adaptive manipulator systems that can interactively perform tasks such as playing with children. In this paper, we demonstrate how a low-cost off-the-shelf robotic system can learn closed-loop policies for a stacking task in only a handful of trials-from scratch. Our manipulator is inaccurate and provides no pose feedback. For learning a controller in the work space of a Kinect-style depth camera, we use a model-based reinforcement learning technique. Our learning method is data efficient, reduces model bias, and deals with several noise sources in a principled way during long-term planning. We present a way of incorporating state-space constraints into the learning process and analyze the learning gain by exploiting the sequential structure of the stacking task.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/7281384" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="95c237a739575ff263b3160ca55c4761" rel="nofollow" data-download="{"attachment_id":33890971,"asset_id":7281384,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/33890971/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="149991" href="https://ucl.academia.edu/MarcDeisenroth">Marc Deisenroth</a><script data-card-contents-for-user="149991" type="text/json">{"id":149991,"first_name":"Marc","last_name":"Deisenroth","domain_name":"ucl","page_name":"MarcDeisenroth","display_name":"Marc Deisenroth","profile_url":"https://ucl.academia.edu/MarcDeisenroth?f_ri=1688","photo":"https://0.academia-photos.com/149991/2555662/31867308/s65_marc.deisenroth.jpg"}</script></span></span></li><li class="js-paper-rank-work_7281384 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="7281384"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 7281384, container: ".js-paper-rank-work_7281384", }); });</script></li><li class="js-percentile-work_7281384 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 7281384; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_7281384"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_7281384 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="7281384"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 7281384; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=7281384]").text(description); $(".js-view-count-work_7281384").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_7281384").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="7281384"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">4</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="77" rel="nofollow" href="https://www.academia.edu/Documents/in/Robotics">Robotics</a>, <script data-card-contents-for-ri="77" type="text/json">{"id":77,"name":"Robotics","url":"https://www.academia.edu/Documents/in/Robotics?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a>, <script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="55549" rel="nofollow" href="https://www.academia.edu/Documents/in/Gaussian_processes">Gaussian processes</a><script data-card-contents-for-ri="55549" type="text/json">{"id":55549,"name":"Gaussian processes","url":"https://www.academia.edu/Documents/in/Gaussian_processes?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=7281384]'), work: {"id":7281384,"title":"Learning to Control a Low-Cost Manipulator using Data-Efficient Reinforcement Learning","created_at":"2014-06-07T00:05:12.262-07:00","url":"https://www.academia.edu/7281384/Learning_to_Control_a_Low_Cost_Manipulator_using_Data_Efficient_Reinforcement_Learning?f_ri=1688","dom_id":"work_7281384","summary":"Over the last years, there has been substantial progress in robust manipulation in unstructured environments. The long-term goal of our work is to get away from precise, but very expensive robotic systems and to develop affordable, potentially imprecise, self-adaptive manipulator systems that can interactively perform tasks such as playing with children. In this paper, we demonstrate how a low-cost off-the-shelf robotic system can learn closed-loop policies for a stacking task in only a handful of trials-from scratch. Our manipulator is inaccurate and provides no pose feedback. For learning a controller in the work space of a Kinect-style depth camera, we use a model-based reinforcement learning technique. Our learning method is data efficient, reduces model bias, and deals with several noise sources in a principled way during long-term planning. We present a way of incorporating state-space constraints into the learning process and analyze the learning gain by exploiting the sequential structure of the stacking task.","downloadable_attachments":[{"id":33890971,"asset_id":7281384,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":149991,"first_name":"Marc","last_name":"Deisenroth","domain_name":"ucl","page_name":"MarcDeisenroth","display_name":"Marc Deisenroth","profile_url":"https://ucl.academia.edu/MarcDeisenroth?f_ri=1688","photo":"https://0.academia-photos.com/149991/2555662/31867308/s65_marc.deisenroth.jpg"}],"research_interests":[{"id":77,"name":"Robotics","url":"https://www.academia.edu/Documents/in/Robotics?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":55549,"name":"Gaussian processes","url":"https://www.academia.edu/Documents/in/Gaussian_processes?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_40602958" data-work_id="40602958" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/40602958/APRENDIZADO_POR_REFOR%C3%87O_APLICADO_AO_MERCADO_FINANCEIRO">APRENDIZADO POR REFORÇO APLICADO AO MERCADO FINANCEIRO</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">This work aims to show how an intelligent system based on reinforcement learning can benefit of classical financial indicators to overcome classic trading strategies in the stock market. Due to the non-linear, random and non-stationary... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_40602958" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">This work aims to show how an intelligent system based on reinforcement learning can benefit of classical financial indicators to overcome classic trading strategies in the stock market. Due to the non-linear, random and non-stationary nature of the financial markets, various classic strategies fail to benefit from all opportunities where they could be making profit. In order to achieve that, a system was built where only the buying strategy uses reinforcement learning SARSA algorithms while the sell is made through various classic strategies. To test it, a system that uses an identical selling strategy was constructed, where the purchase was based on a classic indicator. Results show that the intelligent algorithm achieved more stable returns, up to six times higher than the ones from tested classical strategies.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/40602958" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="de9d09d68bb11cb78fc7ce91126a0c24" rel="nofollow" data-download="{"attachment_id":60881571,"asset_id":40602958,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/60881571/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="119898526" href="https://independent.academia.edu/BrenoBrito8">Breno Brito</a><script data-card-contents-for-user="119898526" type="text/json">{"id":119898526,"first_name":"Breno","last_name":"Brito","domain_name":"independent","page_name":"BrenoBrito8","display_name":"Breno Brito","profile_url":"https://independent.academia.edu/BrenoBrito8?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_40602958 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="40602958"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 40602958, container: ".js-paper-rank-work_40602958", }); });</script></li><li class="js-percentile-work_40602958 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 40602958; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_40602958"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_40602958 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="40602958"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 40602958; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=40602958]").text(description); $(".js-view-count-work_40602958").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_40602958").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="40602958"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">4</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a>, <script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="29156" rel="nofollow" href="https://www.academia.edu/Documents/in/Stock_Market">Stock Market</a>, <script data-card-contents-for-ri="29156" type="text/json">{"id":29156,"name":"Stock Market","url":"https://www.academia.edu/Documents/in/Stock_Market?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="46185" rel="nofollow" href="https://www.academia.edu/Documents/in/Algorithmic_Trading">Algorithmic Trading</a><script data-card-contents-for-ri="46185" type="text/json">{"id":46185,"name":"Algorithmic Trading","url":"https://www.academia.edu/Documents/in/Algorithmic_Trading?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=40602958]'), work: {"id":40602958,"title":"APRENDIZADO POR REFORÇO APLICADO AO MERCADO FINANCEIRO","created_at":"2019-10-12T13:18:03.322-07:00","url":"https://www.academia.edu/40602958/APRENDIZADO_POR_REFOR%C3%87O_APLICADO_AO_MERCADO_FINANCEIRO?f_ri=1688","dom_id":"work_40602958","summary":"This work aims to show how an intelligent system based on reinforcement learning can benefit of classical financial indicators to overcome classic trading strategies in the stock market. Due to the non-linear, random and non-stationary nature of the financial markets, various classic strategies fail to benefit from all opportunities where they could be making profit. In order to achieve that, a system was built where only the buying strategy uses reinforcement learning SARSA algorithms while the sell is made through various classic strategies. To test it, a system that uses an identical selling strategy was constructed, where the purchase was based on a classic indicator. Results show that the intelligent algorithm achieved more stable returns, up to six times higher than the ones from tested classical strategies.","downloadable_attachments":[{"id":60881571,"asset_id":40602958,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":119898526,"first_name":"Breno","last_name":"Brito","domain_name":"independent","page_name":"BrenoBrito8","display_name":"Breno Brito","profile_url":"https://independent.academia.edu/BrenoBrito8?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":29156,"name":"Stock Market","url":"https://www.academia.edu/Documents/in/Stock_Market?f_ri=1688","nofollow":true},{"id":46185,"name":"Algorithmic Trading","url":"https://www.academia.edu/Documents/in/Algorithmic_Trading?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_77443584" data-work_id="77443584" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" rel="nofollow" href="https://www.academia.edu/77443584/A_Multi_Tier_Architecture_for_Data_Analytics_in_Smart_Metering_Systems">A Multi-Tier Architecture for Data Analytics in Smart Metering Systems</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Abstract With the proliferation of smart meters in smart grids, new challenges have emerged in the energy sector and applications are continuously developed, mainly concerning data analytics to address those challenges. Traditionally,... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_77443584" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Abstract With the proliferation of smart meters in smart grids, new challenges have emerged in the energy sector and applications are continuously developed, mainly concerning data analytics to address those challenges. Traditionally, data analytics in smart grid systems is performed in server-side tier; however, it is necessary to process data analytics close to the smart meter to achieve better performance. In order to process data effectively, it is also necessary to implement methodologies to facilitate the integration of data analysis processes in the Advanced Metering Infrastructure (AMI). This paper presents a novel architecture for data analytics in Smart Metering Systems based on an edge-fog-cloud computing architecture that permits different types of data analytics in a multi-tier context. The proposed architecture has the capability of learning and adapting to different contexts in smart metering systems using a reinforcement learning approach. The architecture was tested with three different analytic applications: forecasting energy consumption, prediction of power quality and prediction of energy theft. The results indicate that the methodology can be feasible solution for direct implementation in Smart Metering Systems.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/77443584" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="6363640" href="https://itmmerida.academia.edu/JuanCarlosOlivaresRojas">Juan Carlos Olivares Rojas</a><script data-card-contents-for-user="6363640" type="text/json">{"id":6363640,"first_name":"Juan Carlos","last_name":"Olivares Rojas","domain_name":"itmmerida","page_name":"JuanCarlosOlivaresRojas","display_name":"Juan Carlos Olivares Rojas","profile_url":"https://itmmerida.academia.edu/JuanCarlosOlivaresRojas?f_ri=1688","photo":"https://0.academia-photos.com/6363640/3150767/34221186/s65_juan_carlos.olivares_rojas.jpg"}</script></span></span></li><li class="js-paper-rank-work_77443584 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="77443584"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 77443584, container: ".js-paper-rank-work_77443584", }); });</script></li><li class="js-percentile-work_77443584 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 77443584; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_77443584"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_77443584 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="77443584"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 77443584; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=77443584]").text(description); $(".js-view-count-work_77443584").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_77443584").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="77443584"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">6</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="60" rel="nofollow" href="https://www.academia.edu/Documents/in/Mechanical_Engineering">Mechanical Engineering</a>, <script data-card-contents-for-ri="60" type="text/json">{"id":60,"name":"Mechanical Engineering","url":"https://www.academia.edu/Documents/in/Mechanical_Engineering?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="305" rel="nofollow" href="https://www.academia.edu/Documents/in/Applied_Mathematics">Applied Mathematics</a>, <script data-card-contents-for-ri="305" type="text/json">{"id":305,"name":"Applied Mathematics","url":"https://www.academia.edu/Documents/in/Applied_Mathematics?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="81219" rel="nofollow" href="https://www.academia.edu/Documents/in/Data_Analytics">Data Analytics</a><script data-card-contents-for-ri="81219" type="text/json">{"id":81219,"name":"Data Analytics","url":"https://www.academia.edu/Documents/in/Data_Analytics?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=77443584]'), work: {"id":77443584,"title":"A Multi-Tier Architecture for Data Analytics in Smart Metering Systems","created_at":"2022-04-24T04:55:18.748-07:00","url":"https://www.academia.edu/77443584/A_Multi_Tier_Architecture_for_Data_Analytics_in_Smart_Metering_Systems?f_ri=1688","dom_id":"work_77443584","summary":"Abstract With the proliferation of smart meters in smart grids, new challenges have emerged in the energy sector and applications are continuously developed, mainly concerning data analytics to address those challenges. Traditionally, data analytics in smart grid systems is performed in server-side tier; however, it is necessary to process data analytics close to the smart meter to achieve better performance. In order to process data effectively, it is also necessary to implement methodologies to facilitate the integration of data analysis processes in the Advanced Metering Infrastructure (AMI). This paper presents a novel architecture for data analytics in Smart Metering Systems based on an edge-fog-cloud computing architecture that permits different types of data analytics in a multi-tier context. The proposed architecture has the capability of learning and adapting to different contexts in smart metering systems using a reinforcement learning approach. The architecture was tested with three different analytic applications: forecasting energy consumption, prediction of power quality and prediction of energy theft. The results indicate that the methodology can be feasible solution for direct implementation in Smart Metering Systems.","downloadable_attachments":[],"ordered_authors":[{"id":6363640,"first_name":"Juan Carlos","last_name":"Olivares Rojas","domain_name":"itmmerida","page_name":"JuanCarlosOlivaresRojas","display_name":"Juan Carlos Olivares Rojas","profile_url":"https://itmmerida.academia.edu/JuanCarlosOlivaresRojas?f_ri=1688","photo":"https://0.academia-photos.com/6363640/3150767/34221186/s65_juan_carlos.olivares_rojas.jpg"}],"research_interests":[{"id":60,"name":"Mechanical Engineering","url":"https://www.academia.edu/Documents/in/Mechanical_Engineering?f_ri=1688","nofollow":true},{"id":305,"name":"Applied Mathematics","url":"https://www.academia.edu/Documents/in/Applied_Mathematics?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":81219,"name":"Data Analytics","url":"https://www.academia.edu/Documents/in/Data_Analytics?f_ri=1688","nofollow":true},{"id":84558,"name":"Smart Metering","url":"https://www.academia.edu/Documents/in/Smart_Metering?f_ri=1688"},{"id":897823,"name":"Elsevier","url":"https://www.academia.edu/Documents/in/Elsevier?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_511081" data-work_id="511081" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/511081/Public_Goods_Game_Simulator_with_Reinforcement_Learning_Agents">Public Goods Game Simulator with Reinforcement Learning Agents</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">As a famous game in the domain of game theory, both pervasive empirical studies as well as intensive theoretical analysis have been conducted and performed worldwide to research different public goods game scenarios. At the same time,... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_511081" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">As a famous game in the domain of game theory, both pervasive empirical studies as well as intensive theoretical analysis have been conducted and performed worldwide to research different public goods game scenarios. At the same time, computer game simulators are utilized widely for better research of game theory by providing easy but powerful visualization and statistics functionalities. However, although solutions of public goods game have been widely discussed with empirical studies or theoretical approaches, no computational and automatic simulation approaches has been adopted. For this reason, we have implemented a computer simulator with reinforcement learning agents module for public goods game, and we have utilized this simulator to further study the characteristics of public goods game. Furthermore, in this article, we have also presented a bunch of interesting experimental results with respect to the strategies that agents used and the profits they earned.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/511081" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="a5a69d47d24392fc602dabcd8727cf38" rel="nofollow" data-download="{"attachment_id":11866488,"asset_id":511081,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/11866488/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="389305" href="https://uga.academia.edu/ManChonU">Man Chon U</a><script data-card-contents-for-user="389305" type="text/json">{"id":389305,"first_name":"Man Chon","last_name":"U","domain_name":"uga","page_name":"ManChonU","display_name":"Man Chon U","profile_url":"https://uga.academia.edu/ManChonU?f_ri=1688","photo":"https://0.academia-photos.com/389305/402639/491926/s65_man_chon.u.jpg"}</script></span></span></li><li class="js-paper-rank-work_511081 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="511081"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 511081, container: ".js-paper-rank-work_511081", }); });</script></li><li class="js-percentile-work_511081 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 511081; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_511081"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_511081 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="511081"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 511081; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=511081]").text(description); $(".js-view-count-work_511081").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_511081").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="511081"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">10</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="757" rel="nofollow" href="https://www.academia.edu/Documents/in/Game_Theory">Game Theory</a>, <script data-card-contents-for-ri="757" type="text/json">{"id":757,"name":"Game Theory","url":"https://www.academia.edu/Documents/in/Game_Theory?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1681" rel="nofollow" href="https://www.academia.edu/Documents/in/Decision_Making">Decision Making</a>, <script data-card-contents-for-ri="1681" type="text/json">{"id":1681,"name":"Decision Making","url":"https://www.academia.edu/Documents/in/Decision_Making?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="11119" rel="nofollow" href="https://www.academia.edu/Documents/in/User_Interface">User Interface</a><script data-card-contents-for-ri="11119" type="text/json">{"id":11119,"name":"User Interface","url":"https://www.academia.edu/Documents/in/User_Interface?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=511081]'), work: {"id":511081,"title":"Public Goods Game Simulator with Reinforcement Learning Agents","created_at":"2011-04-05T03:05:43.073-07:00","url":"https://www.academia.edu/511081/Public_Goods_Game_Simulator_with_Reinforcement_Learning_Agents?f_ri=1688","dom_id":"work_511081","summary":"As a famous game in the domain of game theory, both pervasive empirical studies as well as intensive theoretical analysis have been conducted and performed worldwide to research different public goods game scenarios. At the same time, computer game simulators are utilized widely for better research of game theory by providing easy but powerful visualization and statistics functionalities. However, although solutions of public goods game have been widely discussed with empirical studies or theoretical approaches, no computational and automatic simulation approaches has been adopted. For this reason, we have implemented a computer simulator with reinforcement learning agents module for public goods game, and we have utilized this simulator to further study the characteristics of public goods game. Furthermore, in this article, we have also presented a bunch of interesting experimental results with respect to the strategies that agents used and the profits they earned.","downloadable_attachments":[{"id":11866488,"asset_id":511081,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":389305,"first_name":"Man Chon","last_name":"U","domain_name":"uga","page_name":"ManChonU","display_name":"Man Chon U","profile_url":"https://uga.academia.edu/ManChonU?f_ri=1688","photo":"https://0.academia-photos.com/389305/402639/491926/s65_man_chon.u.jpg"}],"research_interests":[{"id":757,"name":"Game Theory","url":"https://www.academia.edu/Documents/in/Game_Theory?f_ri=1688","nofollow":true},{"id":1681,"name":"Decision Making","url":"https://www.academia.edu/Documents/in/Decision_Making?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":11119,"name":"User Interface","url":"https://www.academia.edu/Documents/in/User_Interface?f_ri=1688","nofollow":true},{"id":52281,"name":"Public Good","url":"https://www.academia.edu/Documents/in/Public_Good?f_ri=1688"},{"id":69542,"name":"Computer Simulation","url":"https://www.academia.edu/Documents/in/Computer_Simulation?f_ri=1688"},{"id":121035,"name":"Profitability","url":"https://www.academia.edu/Documents/in/Profitability?f_ri=1688"},{"id":184965,"name":"Theoretical Analysis","url":"https://www.academia.edu/Documents/in/Theoretical_Analysis?f_ri=1688"},{"id":219474,"name":"Empirical Study","url":"https://www.academia.edu/Documents/in/Empirical_Study?f_ri=1688"},{"id":591186,"name":"Computer Game","url":"https://www.academia.edu/Documents/in/Computer_Game?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_1968660" data-work_id="1968660" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/1968660/Packet_routing_in_dynamically_changing_networks_A_reinforcement_learning_approach">Packet routing in dynamically changing networks: A reinforcement learning approach</a></div></div><div class="u-pb4x u-mt3x"></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/1968660" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="36a526395e96a95b3d48a0a2e3dd4896" rel="nofollow" data-download="{"attachment_id":30925165,"asset_id":1968660,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/30925165/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="2563322" href="https://vtu-in.academia.edu/parashuramBS">parashuram B S</a><script data-card-contents-for-user="2563322" type="text/json">{"id":2563322,"first_name":"parashuram","last_name":"B S","domain_name":"vtu-in","page_name":"parashuramBS","display_name":"parashuram B S","profile_url":"https://vtu-in.academia.edu/parashuramBS?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_1968660 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="1968660"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 1968660, container: ".js-paper-rank-work_1968660", }); });</script></li><li class="js-percentile-work_1968660 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 1968660; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_1968660"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_1968660 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="1968660"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 1968660; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=1968660]").text(description); $(".js-view-count-work_1968660").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_1968660").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="1968660"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">16</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="7789" rel="nofollow" href="https://www.academia.edu/Documents/in/Routing">Routing</a>, <script data-card-contents-for-ri="7789" type="text/json">{"id":7789,"name":"Routing","url":"https://www.academia.edu/Documents/in/Routing?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="11040" rel="nofollow" href="https://www.academia.edu/Documents/in/Routing_algorithm">Routing algorithm</a><script data-card-contents-for-ri="11040" type="text/json">{"id":11040,"name":"Routing algorithm","url":"https://www.academia.edu/Documents/in/Routing_algorithm?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=1968660]'), work: {"id":1968660,"title":"Packet routing in dynamically changing networks: A reinforcement learning approach","created_at":"2012-09-25T14:20:16.419-07:00","url":"https://www.academia.edu/1968660/Packet_routing_in_dynamically_changing_networks_A_reinforcement_learning_approach?f_ri=1688","dom_id":"work_1968660","summary":null,"downloadable_attachments":[{"id":30925165,"asset_id":1968660,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":2563322,"first_name":"parashuram","last_name":"B S","domain_name":"vtu-in","page_name":"parashuramBS","display_name":"parashuram B S","profile_url":"https://vtu-in.academia.edu/parashuramBS?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":7789,"name":"Routing","url":"https://www.academia.edu/Documents/in/Routing?f_ri=1688","nofollow":true},{"id":11040,"name":"Routing algorithm","url":"https://www.academia.edu/Documents/in/Routing_algorithm?f_ri=1688","nofollow":true},{"id":13845,"name":"Time Use","url":"https://www.academia.edu/Documents/in/Time_Use?f_ri=1688"},{"id":99572,"name":"System on Chip","url":"https://www.academia.edu/Documents/in/System_on_Chip?f_ri=1688"},{"id":350868,"name":"Network Routing","url":"https://www.academia.edu/Documents/in/Network_Routing?f_ri=1688"},{"id":371285,"name":"Local Community","url":"https://www.academia.edu/Documents/in/Local_Community?f_ri=1688"},{"id":413301,"name":"Perforation","url":"https://www.academia.edu/Documents/in/Perforation?f_ri=1688"},{"id":456909,"name":"Network on chip","url":"https://www.academia.edu/Documents/in/Network_on_chip?f_ri=1688"},{"id":539410,"name":"Network Topology","url":"https://www.academia.edu/Documents/in/Network_Topology?f_ri=1688"},{"id":937133,"name":"Shortest Path","url":"https://www.academia.edu/Documents/in/Shortest_Path?f_ri=1688"},{"id":1208322,"name":"Neural Information Processing","url":"https://www.academia.edu/Documents/in/Neural_Information_Processing?f_ri=1688"},{"id":1365949,"name":"Intelligent Networks","url":"https://www.academia.edu/Documents/in/Intelligent_Networks?f_ri=1688"},{"id":1396752,"name":"Network on a Chip","url":"https://www.academia.edu/Documents/in/Network_on_a_Chip?f_ri=1688"},{"id":2472817,"name":"Packet Routing","url":"https://www.academia.edu/Documents/in/Packet_Routing?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_28931422" data-work_id="28931422" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/28931422/An_Agent_based_Simulation_of_Power_Generation_Company_Behavior_in_Electricity_Markets_under_Different_Market_Clearing_Mechanisms">An Agent-based Simulation of Power Generation Company Behavior in Electricity Markets under Different Market-Clearing Mechanisms</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Deregulated electricity markets are expected to provide affordable electricity for consumers through promoting competition. Yet, the results do not always fulfill the expectations. The regulator's market-clearing mechanism is a strategic... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_28931422" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Deregulated electricity markets are expected to provide affordable electricity for consumers through promoting competition. Yet, the results do not always fulfill the expectations. The regulator's market-clearing mechanism is a strategic choice that may affect the level of competition in the market. We conceive of the market-clearing mechanism as composed of two components: pricing rules and rationing policies. We investigate the strategic behavior of power generation companies under different market-clearing mechanisms using an agent-based simulation model which integrates a game-theoretical understanding of the auction mechanism in the electricity market and generation companies' learning mechanism. Results of our simulation experiments are presented using various case studies representing different market settings. The market in simulations is observed to converge to a Nash equilibrium of the stage game or to a similar state under most parameter combinations. Compared to pay-as-bid pricing, bid prices are closer to marginal costs on average under uniform pricing while GenCos' total profit is also higher. The random rationing policy of the ISO turns out to be more successful in achieving lower bid prices and lower GenCo profits. In minimizing GenCos' total profit, a combination of pay-as-bid pricing rule and random rationing policy is observed to be the most promising.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/28931422" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="42546b7b28375fd513c202db2ba9e415" rel="nofollow" data-download="{"attachment_id":49367103,"asset_id":28931422,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/49367103/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="483651" href="https://helmholtz.academia.edu/DanialEsmaeiliAliabadi">Danial Esmaeili Aliabadi</a><script data-card-contents-for-user="483651" type="text/json">{"id":483651,"first_name":"Danial","last_name":"Esmaeili Aliabadi","domain_name":"helmholtz","page_name":"DanialEsmaeiliAliabadi","display_name":"Danial Esmaeili Aliabadi","profile_url":"https://helmholtz.academia.edu/DanialEsmaeiliAliabadi?f_ri=1688","photo":"https://0.academia-photos.com/483651/163878/18432075/s65_danial.esmaeili_aliabadi.jpg"}</script></span></span></li><li class="js-paper-rank-work_28931422 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="28931422"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 28931422, container: ".js-paper-rank-work_28931422", }); });</script></li><li class="js-percentile-work_28931422 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 28931422; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_28931422"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_28931422 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="28931422"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 28931422; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=28931422]").text(description); $(".js-view-count-work_28931422").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_28931422").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="28931422"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">4</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="757" rel="nofollow" href="https://www.academia.edu/Documents/in/Game_Theory">Game Theory</a>, <script data-card-contents-for-ri="757" type="text/json">{"id":757,"name":"Game Theory","url":"https://www.academia.edu/Documents/in/Game_Theory?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="5337" rel="nofollow" href="https://www.academia.edu/Documents/in/Agent_Based_Simulation">Agent Based Simulation</a>, <script data-card-contents-for-ri="5337" type="text/json">{"id":5337,"name":"Agent Based Simulation","url":"https://www.academia.edu/Documents/in/Agent_Based_Simulation?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="55977" rel="nofollow" href="https://www.academia.edu/Documents/in/Pricing">Pricing</a><script data-card-contents-for-ri="55977" type="text/json">{"id":55977,"name":"Pricing","url":"https://www.academia.edu/Documents/in/Pricing?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=28931422]'), work: {"id":28931422,"title":"An Agent-based Simulation of Power Generation Company Behavior in Electricity Markets under Different Market-Clearing Mechanisms","created_at":"2016-10-05T01:28:29.745-07:00","url":"https://www.academia.edu/28931422/An_Agent_based_Simulation_of_Power_Generation_Company_Behavior_in_Electricity_Markets_under_Different_Market_Clearing_Mechanisms?f_ri=1688","dom_id":"work_28931422","summary":"Deregulated electricity markets are expected to provide affordable electricity for consumers through promoting competition. Yet, the results do not always fulfill the expectations. The regulator's market-clearing mechanism is a strategic choice that may affect the level of competition in the market. We conceive of the market-clearing mechanism as composed of two components: pricing rules and rationing policies. We investigate the strategic behavior of power generation companies under different market-clearing mechanisms using an agent-based simulation model which integrates a game-theoretical understanding of the auction mechanism in the electricity market and generation companies' learning mechanism. Results of our simulation experiments are presented using various case studies representing different market settings. The market in simulations is observed to converge to a Nash equilibrium of the stage game or to a similar state under most parameter combinations. Compared to pay-as-bid pricing, bid prices are closer to marginal costs on average under uniform pricing while GenCos' total profit is also higher. The random rationing policy of the ISO turns out to be more successful in achieving lower bid prices and lower GenCo profits. In minimizing GenCos' total profit, a combination of pay-as-bid pricing rule and random rationing policy is observed to be the most promising.","downloadable_attachments":[{"id":49367103,"asset_id":28931422,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":483651,"first_name":"Danial","last_name":"Esmaeili Aliabadi","domain_name":"helmholtz","page_name":"DanialEsmaeiliAliabadi","display_name":"Danial Esmaeili Aliabadi","profile_url":"https://helmholtz.academia.edu/DanialEsmaeiliAliabadi?f_ri=1688","photo":"https://0.academia-photos.com/483651/163878/18432075/s65_danial.esmaeili_aliabadi.jpg"}],"research_interests":[{"id":757,"name":"Game Theory","url":"https://www.academia.edu/Documents/in/Game_Theory?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":5337,"name":"Agent Based Simulation","url":"https://www.academia.edu/Documents/in/Agent_Based_Simulation?f_ri=1688","nofollow":true},{"id":55977,"name":"Pricing","url":"https://www.academia.edu/Documents/in/Pricing?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_76546906" data-work_id="76546906" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/76546906/Approximate_dynamic_programming_for_an_inventory_problem_Empirical_comparison">Approximate dynamic programming for an inventory problem: Empirical comparison</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">This study investigates the application of learning-based and simulation-based Approximate Dynamic Programming (ADP) approaches to an inventory problem under the Generalized Autoregressive Conditional Heteroscedasticity (GARCH) model.... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_76546906" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">This study investigates the application of learning-based and simulation-based Approximate Dynamic Programming (ADP) approaches to an inventory problem under the Generalized Autoregressive Conditional Heteroscedasticity (GARCH) model. Specifically, we explore the robustness of a learning-based ADP method, Sarsa, with a GARCH(1,1) demand model, and provide empirical comparison between Sarsa and two simulation-based ADP methods: Rollout and Hindsight Optimization (HO). Our findings assuage a concern regarding the effect of GARCH(1,1) latent state variables on learning-based ADP and provide practical strategies to design an appropriate ADP method for inventory problems. In addition, we expose a relationship between ADP parameters and conservative behavior. Our empirical results are based on a variety of problem settings, including demand correlations, demand variances, and cost structures.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/76546906" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="2812b225a11d5ca48bcf348be99d3c7c" rel="nofollow" data-download="{"attachment_id":84222140,"asset_id":76546906,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/84222140/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="220361" href="https://colostate.academia.edu/EdwinChong">Edwin K P Chong</a><script data-card-contents-for-user="220361" type="text/json">{"id":220361,"first_name":"Edwin","last_name":"Chong","domain_name":"colostate","page_name":"EdwinChong","display_name":"Edwin K P Chong","profile_url":"https://colostate.academia.edu/EdwinChong?f_ri=1688","photo":"https://0.academia-photos.com/220361/18598021/62842310/s65_edwin.chong.jpg"}</script></span></span></li><li class="js-paper-rank-work_76546906 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="76546906"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 76546906, container: ".js-paper-rank-work_76546906", }); });</script></li><li class="js-percentile-work_76546906 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 76546906; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_76546906"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_76546906 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="76546906"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 76546906; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=76546906]").text(description); $(".js-view-count-work_76546906").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_76546906").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="76546906"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">9</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="48" rel="nofollow" href="https://www.academia.edu/Documents/in/Engineering">Engineering</a>, <script data-card-contents-for-ri="48" type="text/json">{"id":48,"name":"Engineering","url":"https://www.academia.edu/Documents/in/Engineering?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="9329" rel="nofollow" href="https://www.academia.edu/Documents/in/Inventory_Control">Inventory Control</a>, <script data-card-contents-for-ri="9329" type="text/json">{"id":9329,"name":"Inventory Control","url":"https://www.academia.edu/Documents/in/Inventory_Control?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="42496" rel="nofollow" href="https://www.academia.edu/Documents/in/Heterogeneity">Heterogeneity</a><script data-card-contents-for-ri="42496" type="text/json">{"id":42496,"name":"Heterogeneity","url":"https://www.academia.edu/Documents/in/Heterogeneity?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=76546906]'), work: {"id":76546906,"title":"Approximate dynamic programming for an inventory problem: Empirical comparison","created_at":"2022-04-15T11:25:15.709-07:00","url":"https://www.academia.edu/76546906/Approximate_dynamic_programming_for_an_inventory_problem_Empirical_comparison?f_ri=1688","dom_id":"work_76546906","summary":"This study investigates the application of learning-based and simulation-based Approximate Dynamic Programming (ADP) approaches to an inventory problem under the Generalized Autoregressive Conditional Heteroscedasticity (GARCH) model. Specifically, we explore the robustness of a learning-based ADP method, Sarsa, with a GARCH(1,1) demand model, and provide empirical comparison between Sarsa and two simulation-based ADP methods: Rollout and Hindsight Optimization (HO). Our findings assuage a concern regarding the effect of GARCH(1,1) latent state variables on learning-based ADP and provide practical strategies to design an appropriate ADP method for inventory problems. In addition, we expose a relationship between ADP parameters and conservative behavior. Our empirical results are based on a variety of problem settings, including demand correlations, demand variances, and cost structures.","downloadable_attachments":[{"id":84222140,"asset_id":76546906,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":220361,"first_name":"Edwin","last_name":"Chong","domain_name":"colostate","page_name":"EdwinChong","display_name":"Edwin K P Chong","profile_url":"https://colostate.academia.edu/EdwinChong?f_ri=1688","photo":"https://0.academia-photos.com/220361/18598021/62842310/s65_edwin.chong.jpg"}],"research_interests":[{"id":48,"name":"Engineering","url":"https://www.academia.edu/Documents/in/Engineering?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":9329,"name":"Inventory Control","url":"https://www.academia.edu/Documents/in/Inventory_Control?f_ri=1688","nofollow":true},{"id":42496,"name":"Heterogeneity","url":"https://www.academia.edu/Documents/in/Heterogeneity?f_ri=1688","nofollow":true},{"id":48636,"name":"Simulation","url":"https://www.academia.edu/Documents/in/Simulation?f_ri=1688"},{"id":77165,"name":"Approximate Dynamic Programming","url":"https://www.academia.edu/Documents/in/Approximate_Dynamic_Programming?f_ri=1688"},{"id":80414,"name":"Mathematical Sciences","url":"https://www.academia.edu/Documents/in/Mathematical_Sciences?f_ri=1688"},{"id":688640,"name":"Conditional Heteroscedasticity","url":"https://www.academia.edu/Documents/in/Conditional_Heteroscedasticity?f_ri=1688"},{"id":3489303,"name":"GARCH Model","url":"https://www.academia.edu/Documents/in/GARCH_Model?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_75189685" data-work_id="75189685" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" rel="nofollow" href="https://www.academia.edu/75189685/Studies_on_Hierarchical_Reinforcement_Learning_in_Multi_Agent_Environment">Studies on Hierarchical Reinforcement Learning in Multi-Agent Environment</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Reinforcement learning addresses the problem of learning to select actions in order to maximize an agent&amp;amp;amp;amp;#x27;s performance in unknown environments. To scale reinforcement learning to complex real-world tasks, agent must... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_75189685" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Reinforcement learning addresses the problem of learning to select actions in order to maximize an agent&amp;amp;amp;amp;#x27;s performance in unknown environments. To scale reinforcement learning to complex real-world tasks, agent must be able to discover hierarchical structures within their learning and control systems. In this paper, the use of hierarchical reinforcement learning (HRL) to speed up the acquisition of cooperative multi-agent tasks is investigated, and a hierarchical multi-agent reinforcement learning (RL) framework and a hierarchical multi-agent RL algorithm called cooperative HRL are proposed. A fundamental property of the proposed approach is that it allows agents to learn coordination faster by sharing information at the level of cooperative subtasks, rather than attempting to learn coordination at the level of primitive actions. This approach can significantly speed up learning and make it more scalable with the number of agents.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/75189685" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="141865178" href="https://ucv.academia.edu/AlonsoMarin">Alonso Marin</a><script data-card-contents-for-user="141865178" type="text/json">{"id":141865178,"first_name":"Alonso","last_name":"Marin","domain_name":"ucv","page_name":"AlonsoMarin","display_name":"Alonso Marin","profile_url":"https://ucv.academia.edu/AlonsoMarin?f_ri=1688","photo":"https://0.academia-photos.com/141865178/38436640/32160011/s65_alonso.marin.jpg"}</script></span></span></li><li class="js-paper-rank-work_75189685 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="75189685"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 75189685, container: ".js-paper-rank-work_75189685", }); });</script></li><li class="js-percentile-work_75189685 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 75189685; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_75189685"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_75189685 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="75189685"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 75189685; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=75189685]").text(description); $(".js-view-count-work_75189685").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_75189685").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="75189685"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">3</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="35399" rel="nofollow" href="https://www.academia.edu/Documents/in/Control_system">Control system</a>, <script data-card-contents-for-ri="35399" type="text/json">{"id":35399,"name":"Control system","url":"https://www.academia.edu/Documents/in/Control_system?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="3222489" rel="nofollow" href="https://www.academia.edu/Documents/in/Hierarchical_Structure">Hierarchical Structure</a><script data-card-contents-for-ri="3222489" type="text/json">{"id":3222489,"name":"Hierarchical Structure","url":"https://www.academia.edu/Documents/in/Hierarchical_Structure?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=75189685]'), work: {"id":75189685,"title":"Studies on Hierarchical Reinforcement Learning in Multi-Agent Environment","created_at":"2022-04-01T17:47:07.367-07:00","url":"https://www.academia.edu/75189685/Studies_on_Hierarchical_Reinforcement_Learning_in_Multi_Agent_Environment?f_ri=1688","dom_id":"work_75189685","summary":"Reinforcement learning addresses the problem of learning to select actions in order to maximize an agent\u0026amp;amp;amp;amp;#x27;s performance in unknown environments. To scale reinforcement learning to complex real-world tasks, agent must be able to discover hierarchical structures within their learning and control systems. In this paper, the use of hierarchical reinforcement learning (HRL) to speed up the acquisition of cooperative multi-agent tasks is investigated, and a hierarchical multi-agent reinforcement learning (RL) framework and a hierarchical multi-agent RL algorithm called cooperative HRL are proposed. A fundamental property of the proposed approach is that it allows agents to learn coordination faster by sharing information at the level of cooperative subtasks, rather than attempting to learn coordination at the level of primitive actions. This approach can significantly speed up learning and make it more scalable with the number of agents.","downloadable_attachments":[],"ordered_authors":[{"id":141865178,"first_name":"Alonso","last_name":"Marin","domain_name":"ucv","page_name":"AlonsoMarin","display_name":"Alonso Marin","profile_url":"https://ucv.academia.edu/AlonsoMarin?f_ri=1688","photo":"https://0.academia-photos.com/141865178/38436640/32160011/s65_alonso.marin.jpg"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":35399,"name":"Control system","url":"https://www.academia.edu/Documents/in/Control_system?f_ri=1688","nofollow":true},{"id":3222489,"name":"Hierarchical Structure","url":"https://www.academia.edu/Documents/in/Hierarchical_Structure?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_76765828" data-work_id="76765828" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/76765828/Multi_Agent_Reinforcement_Learning_for_Network_Selection_and_Resource_Allocation_in_Heterogeneous_multi_RAT_Networks">Multi-Agent Reinforcement Learning for Network Selection and Resource Allocation in Heterogeneous multi-RAT Networks</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">The rapid production of mobile devices along with the wireless applications boom is continuing to evolve daily. This motivates the exploitation of wireless spectrum using multiple Radio Access Technologies (multi-RAT) and developing... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_76765828" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">The rapid production of mobile devices along with the wireless applications boom is continuing to evolve daily. This motivates the exploitation of wireless spectrum using multiple Radio Access Technologies (multi-RAT) and developing innovative network selection techniques to cope with such intensive demand while improving Quality of Service (QoS). Thus, we propose a distributed framework for dynamic network selection at the edge level, and resource allocation at the Radio Access Network (RAN) level, while taking into consideration diverse applications’ characteristics. In particular, our framework employs a deep Multi-Agent Reinforcement Learning (DMARL) algorithm, that aims to maximize the edge nodes’ quality of experience while extending the battery lifetime of the nodes and leveraging adaptive compression schemes. Indeed, our framework enables data transfer from the network’s edge nodes, with multi-RAT capabilities, to the cloud in a cost and energy-efficient manner, while mainta...</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/76765828" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="992ae1138129c49d5b34cfdf9d8124cf" rel="nofollow" data-download="{"attachment_id":84360231,"asset_id":76765828,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/84360231/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="8619446" href="https://hbku.academia.edu/AimanErbad">Aiman Erbad</a><script data-card-contents-for-user="8619446" type="text/json">{"id":8619446,"first_name":"Aiman","last_name":"Erbad","domain_name":"hbku","page_name":"AimanErbad","display_name":"Aiman Erbad","profile_url":"https://hbku.academia.edu/AimanErbad?f_ri=1688","photo":"https://0.academia-photos.com/8619446/45620913/35516346/s65_aiman.erbad.jpg"}</script></span></span></li><li class="js-paper-rank-work_76765828 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="76765828"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 76765828, container: ".js-paper-rank-work_76765828", }); });</script></li><li class="js-percentile-work_76765828 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 76765828; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_76765828"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_76765828 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="76765828"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 76765828; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=76765828]").text(description); $(".js-view-count-work_76765828").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_76765828").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="76765828"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">2</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a><script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=76765828]'), work: {"id":76765828,"title":"Multi-Agent Reinforcement Learning for Network Selection and Resource Allocation in Heterogeneous multi-RAT Networks","created_at":"2022-04-17T20:15:15.654-07:00","url":"https://www.academia.edu/76765828/Multi_Agent_Reinforcement_Learning_for_Network_Selection_and_Resource_Allocation_in_Heterogeneous_multi_RAT_Networks?f_ri=1688","dom_id":"work_76765828","summary":"The rapid production of mobile devices along with the wireless applications boom is continuing to evolve daily. This motivates the exploitation of wireless spectrum using multiple Radio Access Technologies (multi-RAT) and developing innovative network selection techniques to cope with such intensive demand while improving Quality of Service (QoS). Thus, we propose a distributed framework for dynamic network selection at the edge level, and resource allocation at the Radio Access Network (RAN) level, while taking into consideration diverse applications’ characteristics. In particular, our framework employs a deep Multi-Agent Reinforcement Learning (DMARL) algorithm, that aims to maximize the edge nodes’ quality of experience while extending the battery lifetime of the nodes and leveraging adaptive compression schemes. Indeed, our framework enables data transfer from the network’s edge nodes, with multi-RAT capabilities, to the cloud in a cost and energy-efficient manner, while mainta...","downloadable_attachments":[{"id":84360231,"asset_id":76765828,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":8619446,"first_name":"Aiman","last_name":"Erbad","domain_name":"hbku","page_name":"AimanErbad","display_name":"Aiman Erbad","profile_url":"https://hbku.academia.edu/AimanErbad?f_ri=1688","photo":"https://0.academia-photos.com/8619446/45620913/35516346/s65_aiman.erbad.jpg"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_13777597 coauthored" data-work_id="13777597" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/13777597/Assigning_discounts_in_a_marketing_campaign_by_using_reinforcement_learning_and_neural_networks">Assigning discounts in a marketing campaign by using reinforcement learning and neural networks</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">In this work, RL is used to find an optimal policy for a marketing campaign. Data show a complex characterization of state and action spaces. Two approaches are proposed to circumvent this problem. The first approach is based on the... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_13777597" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">In this work, RL is used to find an optimal policy for a marketing campaign. Data show a complex characterization of state and action spaces. Two approaches are proposed to circumvent this problem. The first approach is based on the self-organizing map (SOM), which is used to aggregate states. The second approach uses a multilayer perceptron (MLP) to carry out a regression of the action-value function. The results indicate that both approaches can improve a targeted marketing campaign. Moreover, the SOM approach allows an intuitive interpretation of the results, and the MLP approach yields robust results with generalization capabilities.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/13777597" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="b3d060b9d5f3971d20beaa5bfb22a74a" rel="nofollow" data-download="{"attachment_id":44965819,"asset_id":13777597,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/44965819/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="55027481" href="https://independent.academia.edu/EmilioSoria2">Emilio Soria</a><script data-card-contents-for-user="55027481" type="text/json">{"id":55027481,"first_name":"Emilio","last_name":"Soria","domain_name":"independent","page_name":"EmilioSoria2","display_name":"Emilio Soria","profile_url":"https://independent.academia.edu/EmilioSoria2?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span><span class="u-displayInlineBlock InlineList-item-text"> and <span class="u-textDecorationUnderline u-clickable InlineList-item-text js-work-more-authors-13777597">+2</span><div class="hidden js-additional-users-13777597"><div><span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a href="https://upv-es.academia.edu/AlbertoPalomares">Alberto Palomares</a></span></div><div><span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a href="https://lizzysuccess.academia.edu/Jos%C3%A9Mart%C3%ADnGuerrero">José Martín D Guerrero</a></span></div></div></span><script>(function(){ var popoverSettings = { el: $('.js-work-more-authors-13777597'), placement: 'bottom', hide_delay: 200, html: true, content: function(){ return $('.js-additional-users-13777597').html(); } } new HoverPopover(popoverSettings); })();</script></li><li class="js-paper-rank-work_13777597 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="13777597"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 13777597, container: ".js-paper-rank-work_13777597", }); });</script></li><li class="js-percentile-work_13777597 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 13777597; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_13777597"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_13777597 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="13777597"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 13777597; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=13777597]").text(description); $(".js-view-count-work_13777597").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_13777597").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="13777597"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">8</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="39" rel="nofollow" href="https://www.academia.edu/Documents/in/Marketing">Marketing</a>, <script data-card-contents-for-ri="39" type="text/json">{"id":39,"name":"Marketing","url":"https://www.academia.edu/Documents/in/Marketing?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="11598" rel="nofollow" href="https://www.academia.edu/Documents/in/Neural_Networks">Neural Networks</a>, <script data-card-contents-for-ri="11598" type="text/json">{"id":11598,"name":"Neural Networks","url":"https://www.academia.edu/Documents/in/Neural_Networks?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="26066" rel="nofollow" href="https://www.academia.edu/Documents/in/Neural_Network">Neural Network</a><script data-card-contents-for-ri="26066" type="text/json">{"id":26066,"name":"Neural Network","url":"https://www.academia.edu/Documents/in/Neural_Network?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=13777597]'), work: {"id":13777597,"title":"Assigning discounts in a marketing campaign by using reinforcement learning and neural networks","created_at":"2015-07-07T20:16:36.559-07:00","url":"https://www.academia.edu/13777597/Assigning_discounts_in_a_marketing_campaign_by_using_reinforcement_learning_and_neural_networks?f_ri=1688","dom_id":"work_13777597","summary":"In this work, RL is used to find an optimal policy for a marketing campaign. Data show a complex characterization of state and action spaces. Two approaches are proposed to circumvent this problem. The first approach is based on the self-organizing map (SOM), which is used to aggregate states. The second approach uses a multilayer perceptron (MLP) to carry out a regression of the action-value function. The results indicate that both approaches can improve a targeted marketing campaign. Moreover, the SOM approach allows an intuitive interpretation of the results, and the MLP approach yields robust results with generalization capabilities.","downloadable_attachments":[{"id":44965819,"asset_id":13777597,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":55027481,"first_name":"Emilio","last_name":"Soria","domain_name":"independent","page_name":"EmilioSoria2","display_name":"Emilio Soria","profile_url":"https://independent.academia.edu/EmilioSoria2?f_ri=1688","photo":"/images/s65_no_pic.png"},{"id":32886472,"first_name":"Alberto","last_name":"Palomares","domain_name":"upv-es","page_name":"AlbertoPalomares","display_name":"Alberto Palomares","profile_url":"https://upv-es.academia.edu/AlbertoPalomares?f_ri=1688","photo":"/images/s65_no_pic.png"},{"id":32956429,"first_name":"José Martín","last_name":"Guerrero","domain_name":"lizzysuccess","page_name":"JoséMartínGuerrero","display_name":"José Martín D Guerrero","profile_url":"https://lizzysuccess.academia.edu/Jos%C3%A9Mart%C3%ADnGuerrero?f_ri=1688","photo":"https://0.academia-photos.com/32956429/9799583/10919037/s65_jos_mart_n.guerrero.png"}],"research_interests":[{"id":39,"name":"Marketing","url":"https://www.academia.edu/Documents/in/Marketing?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":11598,"name":"Neural Networks","url":"https://www.academia.edu/Documents/in/Neural_Networks?f_ri=1688","nofollow":true},{"id":26066,"name":"Neural Network","url":"https://www.academia.edu/Documents/in/Neural_Network?f_ri=1688","nofollow":true},{"id":80414,"name":"Mathematical Sciences","url":"https://www.academia.edu/Documents/in/Mathematical_Sciences?f_ri=1688"},{"id":238159,"name":"Multilayer Perceptron","url":"https://www.academia.edu/Documents/in/Multilayer_Perceptron?f_ri=1688"},{"id":506482,"name":"Function approximation","url":"https://www.academia.edu/Documents/in/Function_approximation?f_ri=1688"},{"id":557803,"name":"Self Organized Map","url":"https://www.academia.edu/Documents/in/Self_Organized_Map?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_81020921" data-work_id="81020921" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/81020921/Survey_on_reinforcement_learning_for_language_processing">Survey on reinforcement learning for language processing</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">In recent years some researchers have explored the use of reinforcement learning (RL) algorithms as key components in the solution of various natural language processing tasks. For instance, some of these algorithms leveraging deep neural... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_81020921" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">In recent years some researchers have explored the use of reinforcement learning (RL) algorithms as key components in the solution of various natural language processing tasks. For instance, some of these algorithms leveraging deep neural learning have found their way into conversational systems. This paper reviews the state of the art of RL methods for their possible use for different problems of natural language processing, focusing primarily on conversational systems, mainly due to their growing relevance. We provide detailed descriptions of the problems as well as discussions of why RL is well-suited to solve them. Also, we analyze the advantages and limitations of these methods. Finally, we elaborate on promising research directions in natural language processing that might benefit from reinforcement learning.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/81020921" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="46a464b01f8bbac766e06694af397948" rel="nofollow" data-download="{"attachment_id":87208943,"asset_id":81020921,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/87208943/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="28727451" href="https://uni-hamburg.academia.edu/StefanWermter">Stefan Wermter</a><script data-card-contents-for-user="28727451" type="text/json">{"id":28727451,"first_name":"Stefan","last_name":"Wermter","domain_name":"uni-hamburg","page_name":"StefanWermter","display_name":"Stefan Wermter","profile_url":"https://uni-hamburg.academia.edu/StefanWermter?f_ri=1688","photo":"https://0.academia-photos.com/28727451/9817445/19723119/s65_stefan.wermter.jpg"}</script></span></span></li><li class="js-paper-rank-work_81020921 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="81020921"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 81020921, container: ".js-paper-rank-work_81020921", }); });</script></li><li class="js-percentile-work_81020921 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 81020921; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_81020921"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_81020921 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="81020921"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 81020921; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=81020921]").text(description); $(".js-view-count-work_81020921").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_81020921").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="81020921"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">5</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1432" rel="nofollow" href="https://www.academia.edu/Documents/in/Natural_Language_Processing">Natural Language Processing</a>, <script data-card-contents-for-ri="1432" type="text/json">{"id":1432,"name":"Natural Language Processing","url":"https://www.academia.edu/Documents/in/Natural_Language_Processing?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="33890" rel="nofollow" href="https://www.academia.edu/Documents/in/Language_Processing">Language Processing</a>, <script data-card-contents-for-ri="33890" type="text/json">{"id":33890,"name":"Language Processing","url":"https://www.academia.edu/Documents/in/Language_Processing?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="81182" rel="nofollow" href="https://www.academia.edu/Documents/in/Deep_Learning">Deep Learning</a><script data-card-contents-for-ri="81182" type="text/json">{"id":81182,"name":"Deep Learning","url":"https://www.academia.edu/Documents/in/Deep_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=81020921]'), work: {"id":81020921,"title":"Survey on reinforcement learning for language processing","created_at":"2022-06-08T09:06:33.024-07:00","url":"https://www.academia.edu/81020921/Survey_on_reinforcement_learning_for_language_processing?f_ri=1688","dom_id":"work_81020921","summary":"In recent years some researchers have explored the use of reinforcement learning (RL) algorithms as key components in the solution of various natural language processing tasks. For instance, some of these algorithms leveraging deep neural learning have found their way into conversational systems. This paper reviews the state of the art of RL methods for their possible use for different problems of natural language processing, focusing primarily on conversational systems, mainly due to their growing relevance. We provide detailed descriptions of the problems as well as discussions of why RL is well-suited to solve them. Also, we analyze the advantages and limitations of these methods. Finally, we elaborate on promising research directions in natural language processing that might benefit from reinforcement learning. ","downloadable_attachments":[{"id":87208943,"asset_id":81020921,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":28727451,"first_name":"Stefan","last_name":"Wermter","domain_name":"uni-hamburg","page_name":"StefanWermter","display_name":"Stefan Wermter","profile_url":"https://uni-hamburg.academia.edu/StefanWermter?f_ri=1688","photo":"https://0.academia-photos.com/28727451/9817445/19723119/s65_stefan.wermter.jpg"}],"research_interests":[{"id":1432,"name":"Natural Language Processing","url":"https://www.academia.edu/Documents/in/Natural_Language_Processing?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":33890,"name":"Language Processing","url":"https://www.academia.edu/Documents/in/Language_Processing?f_ri=1688","nofollow":true},{"id":81182,"name":"Deep Learning","url":"https://www.academia.edu/Documents/in/Deep_Learning?f_ri=1688","nofollow":true},{"id":2532663,"name":"Deep Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Deep_Reinforcement_Learning?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_80468234" data-work_id="80468234" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/80468234/AI_QMIX_Attention_and_Imagination_for_Dynamic_Multi_Agent_Reinforcement_Learning">AI-QMIX: Attention and Imagination for Dynamic Multi-Agent Reinforcement Learning</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Real world multi-agent tasks often involve varying types and quantities of agents and non-agent entities. Agents frequently do not know a priori how many other agents and non-agent entities they will need to interact with in order to... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_80468234" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Real world multi-agent tasks often involve varying types and quantities of agents and non-agent entities. Agents frequently do not know a priori how many other agents and non-agent entities they will need to interact with in order to complete a given task, requiring agents to generalize across a combinatorial number of task configurations with each potentially requiring different strategies. In this work, we tackle the problem of multi-agent reinforcement learning (MARL) in such dynamic scenarios. We hypothesize that, while the optimal behaviors in these scenarios with varying quantities and types of agents/entities are diverse, they may share common patterns within sub-teams of agents that are combined to form team behavior. As such, we propose a method that can learn these sub-group relationships and how they can be combined, ultimately improving knowledge sharing and generalization across scenarios. This method, Attentive-Imaginative QMIX, extends QMIX for dynamic MARL in two way...</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/80468234" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="3ee89f6d4bd53362a1c5593d9319c423" rel="nofollow" data-download="{"attachment_id":86839580,"asset_id":80468234,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/86839580/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="213088185" href="https://independent.academia.edu/ShariqIqbal5">Shariq Iqbal</a><script data-card-contents-for-user="213088185" type="text/json">{"id":213088185,"first_name":"Shariq","last_name":"Iqbal","domain_name":"independent","page_name":"ShariqIqbal5","display_name":"Shariq Iqbal","profile_url":"https://independent.academia.edu/ShariqIqbal5?f_ri=1688","photo":"https://gravatar.com/avatar/04c7d99bb1f74abe9e6f8ea6c5c7a55e?s=65"}</script></span></span></li><li class="js-paper-rank-work_80468234 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="80468234"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 80468234, container: ".js-paper-rank-work_80468234", }); });</script></li><li class="js-percentile-work_80468234 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 80468234; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_80468234"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_80468234 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="80468234"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 80468234; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=80468234]").text(description); $(".js-view-count-work_80468234").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_80468234").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="80468234"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">2</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a><script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=80468234]'), work: {"id":80468234,"title":"AI-QMIX: Attention and Imagination for Dynamic Multi-Agent Reinforcement Learning","created_at":"2022-06-01T09:34:30.375-07:00","url":"https://www.academia.edu/80468234/AI_QMIX_Attention_and_Imagination_for_Dynamic_Multi_Agent_Reinforcement_Learning?f_ri=1688","dom_id":"work_80468234","summary":"Real world multi-agent tasks often involve varying types and quantities of agents and non-agent entities. Agents frequently do not know a priori how many other agents and non-agent entities they will need to interact with in order to complete a given task, requiring agents to generalize across a combinatorial number of task configurations with each potentially requiring different strategies. In this work, we tackle the problem of multi-agent reinforcement learning (MARL) in such dynamic scenarios. We hypothesize that, while the optimal behaviors in these scenarios with varying quantities and types of agents/entities are diverse, they may share common patterns within sub-teams of agents that are combined to form team behavior. As such, we propose a method that can learn these sub-group relationships and how they can be combined, ultimately improving knowledge sharing and generalization across scenarios. This method, Attentive-Imaginative QMIX, extends QMIX for dynamic MARL in two way...","downloadable_attachments":[{"id":86839580,"asset_id":80468234,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":213088185,"first_name":"Shariq","last_name":"Iqbal","domain_name":"independent","page_name":"ShariqIqbal5","display_name":"Shariq Iqbal","profile_url":"https://independent.academia.edu/ShariqIqbal5?f_ri=1688","photo":"https://gravatar.com/avatar/04c7d99bb1f74abe9e6f8ea6c5c7a55e?s=65"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_40018465" data-work_id="40018465" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/40018465/A_Human_Behavior_Strategy_Estimation_Method_Using_Homology_Search_for_Rock_Scissors_Paper_Game">A Human Behavior Strategy Estimation Method Using Homology Search for Rock-Scissors- Paper Game</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">In our previous studies, we showed that the estimation of the rock-scissors-paper (RSP, janken) game strategy is effective for the prediction of a player's hand sign sequences. The purpose of this study is to propose a method to estimate... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_40018465" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">In our previous studies, we showed that the estimation of the rock-scissors-paper (RSP, janken) game strategy is effective for the prediction of a player's hand sign sequences. The purpose of this study is to propose a method to estimate the RSP game strategy in the basis of human personality in an RSP game. To estimate a player's strategy in the RSP game, it is effective to compare the player's hand sign sequence and the hand sign sequences given by various typical RSP strategies on the basis of similarity. In this study, we propose the method of using a homology search to calculate the similarity between sequences. The results show that our proposed method is effective for strategy estimation .</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/40018465" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="2c34bb973893a05f446bad602ffef721" rel="nofollow" data-download="{"attachment_id":60213461,"asset_id":40018465,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/60213461/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="450360" href="https://tus.academia.edu/SongJuKim">Song-Ju Kim</a><script data-card-contents-for-user="450360" type="text/json">{"id":450360,"first_name":"Song-Ju","last_name":"Kim","domain_name":"tus","page_name":"SongJuKim","display_name":"Song-Ju Kim","profile_url":"https://tus.academia.edu/SongJuKim?f_ri=1688","photo":"https://0.academia-photos.com/450360/146537/38264094/s65_song-ju.kim.jpeg"}</script></span></span></li><li class="js-paper-rank-work_40018465 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="40018465"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 40018465, container: ".js-paper-rank-work_40018465", }); });</script></li><li class="js-percentile-work_40018465 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 40018465; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_40018465"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_40018465 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="40018465"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 40018465; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=40018465]").text(description); $(".js-view-count-work_40018465").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_40018465").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="40018465"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">5</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a>, <script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="5025" rel="nofollow" href="https://www.academia.edu/Documents/in/Genetic_Programming">Genetic Programming</a>, <script data-card-contents-for-ri="5025" type="text/json">{"id":5025,"name":"Genetic Programming","url":"https://www.academia.edu/Documents/in/Genetic_Programming?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="5026" rel="nofollow" href="https://www.academia.edu/Documents/in/Genetic_Algorithms">Genetic Algorithms</a><script data-card-contents-for-ri="5026" type="text/json">{"id":5026,"name":"Genetic Algorithms","url":"https://www.academia.edu/Documents/in/Genetic_Algorithms?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=40018465]'), work: {"id":40018465,"title":"A Human Behavior Strategy Estimation Method Using Homology Search for Rock-Scissors- Paper Game","created_at":"2019-08-05T21:22:10.729-07:00","url":"https://www.academia.edu/40018465/A_Human_Behavior_Strategy_Estimation_Method_Using_Homology_Search_for_Rock_Scissors_Paper_Game?f_ri=1688","dom_id":"work_40018465","summary":"In our previous studies, we showed that the estimation of the rock-scissors-paper (RSP, janken) game strategy is effective for the prediction of a player's hand sign sequences. The purpose of this study is to propose a method to estimate the RSP game strategy in the basis of human personality in an RSP game. To estimate a player's strategy in the RSP game, it is effective to compare the player's hand sign sequence and the hand sign sequences given by various typical RSP strategies on the basis of similarity. In this study, we propose the method of using a homology search to calculate the similarity between sequences. The results show that our proposed method is effective for strategy estimation .","downloadable_attachments":[{"id":60213461,"asset_id":40018465,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":450360,"first_name":"Song-Ju","last_name":"Kim","domain_name":"tus","page_name":"SongJuKim","display_name":"Song-Ju Kim","profile_url":"https://tus.academia.edu/SongJuKim?f_ri=1688","photo":"https://0.academia-photos.com/450360/146537/38264094/s65_song-ju.kim.jpeg"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":5025,"name":"Genetic Programming","url":"https://www.academia.edu/Documents/in/Genetic_Programming?f_ri=1688","nofollow":true},{"id":5026,"name":"Genetic Algorithms","url":"https://www.academia.edu/Documents/in/Genetic_Algorithms?f_ri=1688","nofollow":true},{"id":2865702,"name":"tug-of-war dynamics","url":"https://www.academia.edu/Documents/in/tug-of-war_dynamics?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_36392756" data-work_id="36392756" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/36392756/Machine_Learning_Methods_Used_in_Disease_Prediction">Machine Learning Methods Used in Disease Prediction</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Advances in machine learning allow us to predict certain events before they happen. Diseases and deaths are one of the most painful of those events for people all around the world. There are huge amounts of health data available that can... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_36392756" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Advances in machine learning allow us to predict certain events before they happen. Diseases and deaths are one of the most painful of those events for people all around the world. There are huge amounts of health data available that can be used for machine learning to predict diseases that are going to be seen in a person. Sometimes it is possible to prevent diseases and even deaths if a patient takes precaution against it. So, it is possible to save millions of lives through predicting and preventing diseases and deaths using machine learning. In this paper, the concept of preventable diseases and deaths will be discussed. Then, studies that have been done in this field will be analyzed. In the end, future potential and enablers of disease prediction will be examined.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/36392756" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="f05e3c3c28a63286e7dc13e1984d3426" rel="nofollow" data-download="{"attachment_id":56303279,"asset_id":36392756,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/56303279/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="29391942" href="https://sehir.academia.edu/MustafaO%C4%9Fuz">Mustafa Oğuz</a><script data-card-contents-for-user="29391942" type="text/json">{"id":29391942,"first_name":"Mustafa","last_name":"Oğuz","domain_name":"sehir","page_name":"MustafaOğuz","display_name":"Mustafa Oğuz","profile_url":"https://sehir.academia.edu/MustafaO%C4%9Fuz?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_36392756 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="36392756"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 36392756, container: ".js-paper-rank-work_36392756", }); });</script></li><li class="js-percentile-work_36392756 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 36392756; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_36392756"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_36392756 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="36392756"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 36392756; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=36392756]").text(description); $(".js-view-count-work_36392756").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_36392756").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="36392756"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">14</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="146" rel="nofollow" href="https://www.academia.edu/Documents/in/Bioinformatics">Bioinformatics</a>, <script data-card-contents-for-ri="146" type="text/json">{"id":146,"name":"Bioinformatics","url":"https://www.academia.edu/Documents/in/Bioinformatics?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="465" rel="nofollow" href="https://www.academia.edu/Documents/in/Artificial_Intelligence">Artificial Intelligence</a>, <script data-card-contents-for-ri="465" type="text/json">{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a><script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=36392756]'), work: {"id":36392756,"title":"Machine Learning Methods Used in Disease Prediction","created_at":"2018-04-12T06:52:45.799-07:00","url":"https://www.academia.edu/36392756/Machine_Learning_Methods_Used_in_Disease_Prediction?f_ri=1688","dom_id":"work_36392756","summary":"Advances in machine learning allow us to predict certain events before they happen. Diseases and deaths are one of the most painful of those events for people all around the world. There are huge amounts of health data available that can be used for machine learning to predict diseases that are going to be seen in a person. Sometimes it is possible to prevent diseases and even deaths if a patient takes precaution against it. So, it is possible to save millions of lives through predicting and preventing diseases and deaths using machine learning. In this paper, the concept of preventable diseases and deaths will be discussed. Then, studies that have been done in this field will be analyzed. In the end, future potential and enablers of disease prediction will be examined.","downloadable_attachments":[{"id":56303279,"asset_id":36392756,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":29391942,"first_name":"Mustafa","last_name":"Oğuz","domain_name":"sehir","page_name":"MustafaOğuz","display_name":"Mustafa Oğuz","profile_url":"https://sehir.academia.edu/MustafaO%C4%9Fuz?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":146,"name":"Bioinformatics","url":"https://www.academia.edu/Documents/in/Bioinformatics?f_ri=1688","nofollow":true},{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":5398,"name":"Biotechnology","url":"https://www.academia.edu/Documents/in/Biotechnology?f_ri=1688"},{"id":7710,"name":"Biology","url":"https://www.academia.edu/Documents/in/Biology?f_ri=1688"},{"id":10408,"name":"Support Vector Machines","url":"https://www.academia.edu/Documents/in/Support_Vector_Machines?f_ri=1688"},{"id":16288,"name":"Public Health","url":"https://www.academia.edu/Documents/in/Public_Health?f_ri=1688"},{"id":17581,"name":"Futures Studies","url":"https://www.academia.edu/Documents/in/Futures_Studies?f_ri=1688"},{"id":29393,"name":"Healthcare","url":"https://www.academia.edu/Documents/in/Healthcare?f_ri=1688"},{"id":54123,"name":"Artificial Neural Networks","url":"https://www.academia.edu/Documents/in/Artificial_Neural_Networks?f_ri=1688"},{"id":57265,"name":"Healthcare Technology","url":"https://www.academia.edu/Documents/in/Healthcare_Technology?f_ri=1688"},{"id":102839,"name":"Diagnosis","url":"https://www.academia.edu/Documents/in/Diagnosis?f_ri=1688"},{"id":1217494,"name":"Early Diagnosis","url":"https://www.academia.edu/Documents/in/Early_Diagnosis?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_3111487" data-work_id="3111487" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/3111487/Totally_model_free_reinforcement_learning_by_actor_critic_Elman_networks_in_non_Markovian_domains">Totally model-free reinforcement learning by actor-critic Elman networks in non-Markovian domains</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">In this paper we describe how an actor-critic reinforcement learning agent in a non-Markovian domain nds an optimal sequence of actions in a totally modelfree fashion; that is, the agent neither learns transitional probabilities and... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_3111487" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">In this paper we describe how an actor-critic reinforcement learning agent in a non-Markovian domain nds an optimal sequence of actions in a totally modelfree fashion; that is, the agent neither learns transitional probabilities and associated rewards, nor by how much the state space should be augmented so that the Markov property holds. In particular, we employ an Elman-type recurrent neural network to solve non-Markovian problems since an Elman-type network is able to implicitly and automatically render the process Markovian.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/3111487" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="3a610b7b57bd93cdbda05cd3f4189240" rel="nofollow" data-download="{"attachment_id":50463013,"asset_id":3111487,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/50463013/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="791923" href="https://berkeley.academia.edu/StuartDreyfus">Stuart Dreyfus</a><script data-card-contents-for-user="791923" type="text/json">{"id":791923,"first_name":"Stuart","last_name":"Dreyfus","domain_name":"berkeley","page_name":"StuartDreyfus","display_name":"Stuart Dreyfus","profile_url":"https://berkeley.academia.edu/StuartDreyfus?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_3111487 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="3111487"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 3111487, container: ".js-paper-rank-work_3111487", }); });</script></li><li class="js-percentile-work_3111487 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 3111487; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_3111487"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_3111487 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="3111487"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 3111487; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=3111487]").text(description); $(".js-view-count-work_3111487").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_3111487").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="3111487"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">12</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="128" rel="nofollow" href="https://www.academia.edu/Documents/in/History">History</a>, <script data-card-contents-for-ri="128" type="text/json">{"id":128,"name":"History","url":"https://www.academia.edu/Documents/in/History?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2535" rel="nofollow" href="https://www.academia.edu/Documents/in/Operations_Research">Operations Research</a>, <script data-card-contents-for-ri="2535" type="text/json">{"id":2535,"name":"Operations Research","url":"https://www.academia.edu/Documents/in/Operations_Research?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="11598" rel="nofollow" href="https://www.academia.edu/Documents/in/Neural_Networks">Neural Networks</a><script data-card-contents-for-ri="11598" type="text/json">{"id":11598,"name":"Neural Networks","url":"https://www.academia.edu/Documents/in/Neural_Networks?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=3111487]'), work: {"id":3111487,"title":"Totally model-free reinforcement learning by actor-critic Elman networks in non-Markovian domains","created_at":"2013-03-25T23:19:17.233-07:00","url":"https://www.academia.edu/3111487/Totally_model_free_reinforcement_learning_by_actor_critic_Elman_networks_in_non_Markovian_domains?f_ri=1688","dom_id":"work_3111487","summary":"In this paper we describe how an actor-critic reinforcement learning agent in a non-Markovian domain nds an optimal sequence of actions in a totally modelfree fashion; that is, the agent neither learns transitional probabilities and associated rewards, nor by how much the state space should be augmented so that the Markov property holds. In particular, we employ an Elman-type recurrent neural network to solve non-Markovian problems since an Elman-type network is able to implicitly and automatically render the process Markovian.","downloadable_attachments":[{"id":50463013,"asset_id":3111487,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":791923,"first_name":"Stuart","last_name":"Dreyfus","domain_name":"berkeley","page_name":"StuartDreyfus","display_name":"Stuart Dreyfus","profile_url":"https://berkeley.academia.edu/StuartDreyfus?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":128,"name":"History","url":"https://www.academia.edu/Documents/in/History?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2535,"name":"Operations Research","url":"https://www.academia.edu/Documents/in/Operations_Research?f_ri=1688","nofollow":true},{"id":11598,"name":"Neural Networks","url":"https://www.academia.edu/Documents/in/Neural_Networks?f_ri=1688","nofollow":true},{"id":14305,"name":"Industrial Engineering","url":"https://www.academia.edu/Documents/in/Industrial_Engineering?f_ri=1688"},{"id":43131,"name":"Stochastic processes","url":"https://www.academia.edu/Documents/in/Stochastic_processes?f_ri=1688"},{"id":43774,"name":"Learning","url":"https://www.academia.edu/Documents/in/Learning?f_ri=1688"},{"id":51073,"name":"Recurrent Neural Network","url":"https://www.academia.edu/Documents/in/Recurrent_Neural_Network?f_ri=1688"},{"id":135913,"name":"State Space","url":"https://www.academia.edu/Documents/in/State_Space?f_ri=1688"},{"id":197983,"name":"Value Network","url":"https://www.academia.edu/Documents/in/Value_Network?f_ri=1688"},{"id":1365949,"name":"Intelligent Networks","url":"https://www.academia.edu/Documents/in/Intelligent_Networks?f_ri=1688"},{"id":1837730,"name":"Neural Network Model","url":"https://www.academia.edu/Documents/in/Neural_Network_Model?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_12102080" data-work_id="12102080" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/12102080/Learning_by_doing_vs_learning_from_others_in_a_principal_agent_model">Learning by doing vs. learning from others in a principal-agent model</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">We introduce learning in a principal-agent model of stochastic output sharing under moral hazard. Without knowing the agents' preferences and technology the principal tries to learn the optimal agency contract. We implement two learning... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_12102080" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">We introduce learning in a principal-agent model of stochastic output sharing under moral hazard. Without knowing the agents' preferences and technology the principal tries to learn the optimal agency contract. We implement two learning paradigms -social (learning from others) and individual (learning by doing). We use a social evolutionary learning algorithm (SEL) to represent social learning. Within the individual learning paradigm, we investigate the performance of reinforcement learning (RL), experience-weighted attraction learning (EWA), and individual evolutionary learning (IEL). Overall, our results show that learning in the principal-agent environment is very difficult. This is due to three main reasons: (1) the stochastic environment, (2) a discontinuity in the payoff space in a neighborhood of the optimal contract due to the participation constraint and (3) incorrect evaluation of foregone payoffs in the sequential game principal-agent setting. The first two factors apply to all learning algorithms we study while the third is the main contributor for the failure of the EWA and IEL models. Social learning (SEL), especially combined with selective replication, is much more successful in achieving convergence to the optimal contract than the canonical versions of individual learning from the literature. A modified version of the IEL algorithm using realized payoff evaluation performs better than the other individual learning models; however, it still falls short of the social learning's ability to converge to the optimal contract.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/12102080" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="8aaec81d4c028e978c9c3fa4f1c7674d" rel="nofollow" data-download="{"attachment_id":46352028,"asset_id":12102080,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/46352028/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="30266994" href="https://independent.academia.edu/JasminaArifovic">Jasmina Arifovic</a><script data-card-contents-for-user="30266994" type="text/json">{"id":30266994,"first_name":"Jasmina","last_name":"Arifovic","domain_name":"independent","page_name":"JasminaArifovic","display_name":"Jasmina Arifovic","profile_url":"https://independent.academia.edu/JasminaArifovic?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_12102080 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="12102080"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 12102080, container: ".js-paper-rank-work_12102080", }); });</script></li><li class="js-percentile-work_12102080 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 12102080; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_12102080"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_12102080 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="12102080"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 12102080; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=12102080]").text(description); $(".js-view-count-work_12102080").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_12102080").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="12102080"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">9</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="6208" rel="nofollow" href="https://www.academia.edu/Documents/in/Economic_Theory">Economic Theory</a>, <script data-card-contents-for-ri="6208" type="text/json">{"id":6208,"name":"Economic Theory","url":"https://www.academia.edu/Documents/in/Economic_Theory?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="18961" rel="nofollow" href="https://www.academia.edu/Documents/in/Social_learning">Social learning</a>, <script data-card-contents-for-ri="18961" type="text/json">{"id":18961,"name":"Social learning","url":"https://www.academia.edu/Documents/in/Social_learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="27659" rel="nofollow" href="https://www.academia.edu/Documents/in/Applied_Economics">Applied Economics</a><script data-card-contents-for-ri="27659" type="text/json">{"id":27659,"name":"Applied Economics","url":"https://www.academia.edu/Documents/in/Applied_Economics?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=12102080]'), work: {"id":12102080,"title":"Learning by doing vs. learning from others in a principal-agent model","created_at":"2015-04-25T07:12:07.335-07:00","url":"https://www.academia.edu/12102080/Learning_by_doing_vs_learning_from_others_in_a_principal_agent_model?f_ri=1688","dom_id":"work_12102080","summary":"We introduce learning in a principal-agent model of stochastic output sharing under moral hazard. Without knowing the agents' preferences and technology the principal tries to learn the optimal agency contract. We implement two learning paradigms -social (learning from others) and individual (learning by doing). We use a social evolutionary learning algorithm (SEL) to represent social learning. Within the individual learning paradigm, we investigate the performance of reinforcement learning (RL), experience-weighted attraction learning (EWA), and individual evolutionary learning (IEL). Overall, our results show that learning in the principal-agent environment is very difficult. This is due to three main reasons: (1) the stochastic environment, (2) a discontinuity in the payoff space in a neighborhood of the optimal contract due to the participation constraint and (3) incorrect evaluation of foregone payoffs in the sequential game principal-agent setting. The first two factors apply to all learning algorithms we study while the third is the main contributor for the failure of the EWA and IEL models. Social learning (SEL), especially combined with selective replication, is much more successful in achieving convergence to the optimal contract than the canonical versions of individual learning from the literature. A modified version of the IEL algorithm using realized payoff evaluation performs better than the other individual learning models; however, it still falls short of the social learning's ability to converge to the optimal contract.","downloadable_attachments":[{"id":46352028,"asset_id":12102080,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":30266994,"first_name":"Jasmina","last_name":"Arifovic","domain_name":"independent","page_name":"JasminaArifovic","display_name":"Jasmina Arifovic","profile_url":"https://independent.academia.edu/JasminaArifovic?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":6208,"name":"Economic Theory","url":"https://www.academia.edu/Documents/in/Economic_Theory?f_ri=1688","nofollow":true},{"id":18961,"name":"Social learning","url":"https://www.academia.edu/Documents/in/Social_learning?f_ri=1688","nofollow":true},{"id":27659,"name":"Applied Economics","url":"https://www.academia.edu/Documents/in/Applied_Economics?f_ri=1688","nofollow":true},{"id":43774,"name":"Learning","url":"https://www.academia.edu/Documents/in/Learning?f_ri=1688"},{"id":65463,"name":"Moral Hazard","url":"https://www.academia.edu/Documents/in/Moral_Hazard?f_ri=1688"},{"id":235775,"name":"Learning by doing","url":"https://www.academia.edu/Documents/in/Learning_by_doing?f_ri=1688"},{"id":1189459,"name":"Evolutionary Learning","url":"https://www.academia.edu/Documents/in/Evolutionary_Learning?f_ri=1688"},{"id":1716319,"name":"Individual Learning","url":"https://www.academia.edu/Documents/in/Individual_Learning?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_12800252 coauthored" data-work_id="12800252" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/12800252/Critical_Roles_for_Anterior_Insula_and_Dorsal_Striatum_in_Punishment_Based_Avoidance_Learning">Critical Roles for Anterior Insula and Dorsal Striatum in Punishment-Based Avoidance Learning</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">The division of human learning systems into reward and punishment opponent modules is still a debated issue. While the implication of ventral prefrontostriatal circuits in reward-based learning is well established, the neural... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_12800252" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">The division of human learning systems into reward and punishment opponent modules is still a debated issue. While the implication of ventral prefrontostriatal circuits in reward-based learning is well established, the neural underpinnings of punishmentbased learning remain unclear. To elucidate the causal implication of brain regions that were related to punishment learning in a previous functional neuroimaging study, we tested the effects of brain damage on behavioral performance, using the same task contrasting monetary gains and losses. Cortical and subcortical candidate regions, the anterior insula and dorsal striatum, were assessed in patients presenting brain tumor and Huntington disease, respectively. Both groups exhibited selective impairment of punishment-based learning. Computational modeling suggested complementary roles for these structures: the anterior insula might be involved in learning the negative value of loss-predicting cues, whereas the dorsal striatum might be involved in choosing between those cues so as to avoid the worst.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/12800252" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="f4f5aefa73857bd9d0d99cbd4e16af87" rel="nofollow" data-download="{"attachment_id":45927299,"asset_id":12800252,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/45927299/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="31875551" href="https://sorbonne-fr.academia.edu/DamianJusto">Damian Justo</a><script data-card-contents-for-user="31875551" type="text/json">{"id":31875551,"first_name":"Damian","last_name":"Justo","domain_name":"sorbonne-fr","page_name":"DamianJusto","display_name":"Damian Justo","profile_url":"https://sorbonne-fr.academia.edu/DamianJusto?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span><span class="u-displayInlineBlock InlineList-item-text"> and <span class="u-textDecorationUnderline u-clickable InlineList-item-text js-work-more-authors-12800252">+1</span><div class="hidden js-additional-users-12800252"><div><span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a href="https://independent.academia.edu/VirginieCzernecki">Virginie Czernecki</a></span></div></div></span><script>(function(){ var popoverSettings = { el: $('.js-work-more-authors-12800252'), placement: 'bottom', hide_delay: 200, html: true, content: function(){ return $('.js-additional-users-12800252').html(); } } new HoverPopover(popoverSettings); })();</script></li><li class="js-paper-rank-work_12800252 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="12800252"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 12800252, container: ".js-paper-rank-work_12800252", }); });</script></li><li class="js-percentile-work_12800252 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 12800252; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_12800252"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_12800252 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="12800252"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 12800252; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=12800252]").text(description); $(".js-view-count-work_12800252").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_12800252").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="12800252"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">20</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="161" rel="nofollow" href="https://www.academia.edu/Documents/in/Neuroscience">Neuroscience</a>, <script data-card-contents-for-ri="161" type="text/json">{"id":161,"name":"Neuroscience","url":"https://www.academia.edu/Documents/in/Neuroscience?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="237" rel="nofollow" href="https://www.academia.edu/Documents/in/Cognitive_Science">Cognitive Science</a>, <script data-card-contents-for-ri="237" type="text/json">{"id":237,"name":"Cognitive Science","url":"https://www.academia.edu/Documents/in/Cognitive_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="251" rel="nofollow" href="https://www.academia.edu/Documents/in/Neuropsychology">Neuropsychology</a>, <script data-card-contents-for-ri="251" type="text/json">{"id":251,"name":"Neuropsychology","url":"https://www.academia.edu/Documents/in/Neuropsychology?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a><script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=12800252]'), work: {"id":12800252,"title":"Critical Roles for Anterior Insula and Dorsal Striatum in Punishment-Based Avoidance Learning","created_at":"2015-06-04T11:13:07.840-07:00","url":"https://www.academia.edu/12800252/Critical_Roles_for_Anterior_Insula_and_Dorsal_Striatum_in_Punishment_Based_Avoidance_Learning?f_ri=1688","dom_id":"work_12800252","summary":"The division of human learning systems into reward and punishment opponent modules is still a debated issue. While the implication of ventral prefrontostriatal circuits in reward-based learning is well established, the neural underpinnings of punishmentbased learning remain unclear. To elucidate the causal implication of brain regions that were related to punishment learning in a previous functional neuroimaging study, we tested the effects of brain damage on behavioral performance, using the same task contrasting monetary gains and losses. Cortical and subcortical candidate regions, the anterior insula and dorsal striatum, were assessed in patients presenting brain tumor and Huntington disease, respectively. Both groups exhibited selective impairment of punishment-based learning. Computational modeling suggested complementary roles for these structures: the anterior insula might be involved in learning the negative value of loss-predicting cues, whereas the dorsal striatum might be involved in choosing between those cues so as to avoid the worst.","downloadable_attachments":[{"id":45927299,"asset_id":12800252,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":31875551,"first_name":"Damian","last_name":"Justo","domain_name":"sorbonne-fr","page_name":"DamianJusto","display_name":"Damian Justo","profile_url":"https://sorbonne-fr.academia.edu/DamianJusto?f_ri=1688","photo":"/images/s65_no_pic.png"},{"id":31662983,"first_name":"Virginie","last_name":"Czernecki","domain_name":"independent","page_name":"VirginieCzernecki","display_name":"Virginie Czernecki","profile_url":"https://independent.academia.edu/VirginieCzernecki?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":161,"name":"Neuroscience","url":"https://www.academia.edu/Documents/in/Neuroscience?f_ri=1688","nofollow":true},{"id":237,"name":"Cognitive Science","url":"https://www.academia.edu/Documents/in/Cognitive_Science?f_ri=1688","nofollow":true},{"id":251,"name":"Neuropsychology","url":"https://www.academia.edu/Documents/in/Neuropsychology?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":5451,"name":"Computational Neuroscience","url":"https://www.academia.edu/Documents/in/Computational_Neuroscience?f_ri=1688"},{"id":6200,"name":"Magnetic Resonance Imaging","url":"https://www.academia.edu/Documents/in/Magnetic_Resonance_Imaging?f_ri=1688"},{"id":21477,"name":"Avoidance motivation","url":"https://www.academia.edu/Documents/in/Avoidance_motivation?f_ri=1688"},{"id":21548,"name":"Cognitive Neuroscience","url":"https://www.academia.edu/Documents/in/Cognitive_Neuroscience?f_ri=1688"},{"id":29917,"name":"FMRI","url":"https://www.academia.edu/Documents/in/FMRI?f_ri=1688"},{"id":52176,"name":"Brain Mapping","url":"https://www.academia.edu/Documents/in/Brain_Mapping?f_ri=1688"},{"id":78467,"name":"Cerebral Cortex","url":"https://www.academia.edu/Documents/in/Cerebral_Cortex?f_ri=1688"},{"id":128493,"name":"Punishment","url":"https://www.academia.edu/Documents/in/Punishment?f_ri=1688"},{"id":136475,"name":"Brain lesion","url":"https://www.academia.edu/Documents/in/Brain_lesion?f_ri=1688"},{"id":357850,"name":"Atrophy","url":"https://www.academia.edu/Documents/in/Atrophy?f_ri=1688"},{"id":380825,"name":"Oxygen","url":"https://www.academia.edu/Documents/in/Oxygen?f_ri=1688"},{"id":473565,"name":"Neuron","url":"https://www.academia.edu/Documents/in/Neuron?f_ri=1688"},{"id":662721,"name":"Huntington disease","url":"https://www.academia.edu/Documents/in/Huntington_disease?f_ri=1688"},{"id":1239755,"name":"Neurosciences","url":"https://www.academia.edu/Documents/in/Neurosciences?f_ri=1688"},{"id":1425045,"name":"Brain Neoplasms","url":"https://www.academia.edu/Documents/in/Brain_Neoplasms?f_ri=1688"},{"id":2451403,"name":"Avoidance Learning","url":"https://www.academia.edu/Documents/in/Avoidance_Learning?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_15872255" data-work_id="15872255" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/15872255/Hierarchical_multi_agent_reinforcement_learning">Hierarchical multi-agent reinforcement learning</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">In this paper we investigate the use of hierarchical reinforcement learning to speed up the acquisition of cooperative multi-agent tasks. We extend the MAXQ framework to the multi-agent case. Each agent uses the same MAXQ hierarchy to... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_15872255" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">In this paper we investigate the use of hierarchical reinforcement learning to speed up the acquisition of cooperative multi-agent tasks. We extend the MAXQ framework to the multi-agent case. Each agent uses the same MAXQ hierarchy to decompose a task into sub-tasks. Learning is decentralized, with each agent learning three interrelated skills: how to perform subtasks, which order to do them in, and how to coordinate with other agents. Coordination skills among agents are learned by using joint actions at the highest level(s) of the hierarchy. The Q nodes at the highest level(s) of the hierarchy are configured to represent the joint task-action space among multiple agents. In this approach, each agent only knows what other agents are doing at the level of sub-tasks, and is unaware of lower level (primitive) actions. This hierarchical approach allows agents to learn coordination faster by sharing information at the level of sub-tasks, rather than attempting to learn coordination taking into account primitive joint state-action values. We apply this hierarchical multi-agent reinforcement learning algorithm to a complex AGV scheduling task and compare its performance and speed with other learning approaches, including flat multi-agent, single agent using MAXQ, selfish multiple agents using MAXQ (where each agent acts independently without communicating with the other agents), as well as several well-known AGV heuristics like "first come first serve", "highest queue first" and "nearest station first". We also compare the tradeoffs in learning speed vs. performance of modeling joint action values at multiple levels in the MAXQ hierarchy. * Currently at Agilent Technologies, CA.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/15872255" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="44f271d6d91284db039bd8610e147d14" rel="nofollow" data-download="{"attachment_id":42845266,"asset_id":15872255,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/42845266/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="35044875" href="https://inrialpes.academia.edu/MGhavamzadeh">M. Ghavamzadeh</a><script data-card-contents-for-user="35044875" type="text/json">{"id":35044875,"first_name":"M.","last_name":"Ghavamzadeh","domain_name":"inrialpes","page_name":"MGhavamzadeh","display_name":"M. Ghavamzadeh","profile_url":"https://inrialpes.academia.edu/MGhavamzadeh?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_15872255 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="15872255"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 15872255, container: ".js-paper-rank-work_15872255", }); });</script></li><li class="js-percentile-work_15872255 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 15872255; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_15872255"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_15872255 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="15872255"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 15872255; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=15872255]").text(description); $(".js-view-count-work_15872255").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_15872255").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="15872255"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">11</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="237" rel="nofollow" href="https://www.academia.edu/Documents/in/Cognitive_Science">Cognitive Science</a>, <script data-card-contents-for-ri="237" type="text/json">{"id":237,"name":"Cognitive Science","url":"https://www.academia.edu/Documents/in/Cognitive_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="45873" rel="nofollow" href="https://www.academia.edu/Documents/in/Multi_Agent_System">Multi Agent System</a>, <script data-card-contents-for-ri="45873" type="text/json">{"id":45873,"name":"Multi Agent System","url":"https://www.academia.edu/Documents/in/Multi_Agent_System?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="50027" rel="nofollow" href="https://www.academia.edu/Documents/in/Learning_Community">Learning Community</a><script data-card-contents-for-ri="50027" type="text/json">{"id":50027,"name":"Learning Community","url":"https://www.academia.edu/Documents/in/Learning_Community?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=15872255]'), work: {"id":15872255,"title":"Hierarchical multi-agent reinforcement learning","created_at":"2015-09-18T17:16:59.580-07:00","url":"https://www.academia.edu/15872255/Hierarchical_multi_agent_reinforcement_learning?f_ri=1688","dom_id":"work_15872255","summary":"In this paper we investigate the use of hierarchical reinforcement learning to speed up the acquisition of cooperative multi-agent tasks. We extend the MAXQ framework to the multi-agent case. Each agent uses the same MAXQ hierarchy to decompose a task into sub-tasks. Learning is decentralized, with each agent learning three interrelated skills: how to perform subtasks, which order to do them in, and how to coordinate with other agents. Coordination skills among agents are learned by using joint actions at the highest level(s) of the hierarchy. The Q nodes at the highest level(s) of the hierarchy are configured to represent the joint task-action space among multiple agents. In this approach, each agent only knows what other agents are doing at the level of sub-tasks, and is unaware of lower level (primitive) actions. This hierarchical approach allows agents to learn coordination faster by sharing information at the level of sub-tasks, rather than attempting to learn coordination taking into account primitive joint state-action values. We apply this hierarchical multi-agent reinforcement learning algorithm to a complex AGV scheduling task and compare its performance and speed with other learning approaches, including flat multi-agent, single agent using MAXQ, selfish multiple agents using MAXQ (where each agent acts independently without communicating with the other agents), as well as several well-known AGV heuristics like \"first come first serve\", \"highest queue first\" and \"nearest station first\". We also compare the tradeoffs in learning speed vs. performance of modeling joint action values at multiple levels in the MAXQ hierarchy. * Currently at Agilent Technologies, CA.","downloadable_attachments":[{"id":42845266,"asset_id":15872255,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":35044875,"first_name":"M.","last_name":"Ghavamzadeh","domain_name":"inrialpes","page_name":"MGhavamzadeh","display_name":"M. Ghavamzadeh","profile_url":"https://inrialpes.academia.edu/MGhavamzadeh?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":237,"name":"Cognitive Science","url":"https://www.academia.edu/Documents/in/Cognitive_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":45873,"name":"Multi Agent System","url":"https://www.academia.edu/Documents/in/Multi_Agent_System?f_ri=1688","nofollow":true},{"id":50027,"name":"Learning Community","url":"https://www.academia.edu/Documents/in/Learning_Community?f_ri=1688","nofollow":true},{"id":50926,"name":"Joint Action","url":"https://www.academia.edu/Documents/in/Joint_Action?f_ri=1688"},{"id":59927,"name":"Communicative action","url":"https://www.academia.edu/Documents/in/Communicative_action?f_ri=1688"},{"id":74333,"name":"Autonomous Agents","url":"https://www.academia.edu/Documents/in/Autonomous_Agents?f_ri=1688"},{"id":150240,"name":"Multi-Agent Systems and Autonomous Agents","url":"https://www.academia.edu/Documents/in/Multi-Agent_Systems_and_Autonomous_Agents?f_ri=1688"},{"id":293442,"name":"Agent Coordination","url":"https://www.academia.edu/Documents/in/Agent_Coordination?f_ri=1688"},{"id":1232076,"name":"Action Selection","url":"https://www.academia.edu/Documents/in/Action_Selection?f_ri=1688"},{"id":2283238,"name":"Communication Cost","url":"https://www.academia.edu/Documents/in/Communication_Cost?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_10666369" data-work_id="10666369" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/10666369/Active_learning_using_pre_clustering">Active learning using pre-clustering</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">The paper is concerned with two-class active learning. While the common approach for collecting data in active learning is to select samples close to the classification boundary, better performance can be achieved by taking into account... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_10666369" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">The paper is concerned with two-class active learning. While the common approach for collecting data in active learning is to select samples close to the classification boundary, better performance can be achieved by taking into account the prior data distribution. The main contribution of the paper is a formal framework that incorporates clustering into active learning. The algorithm first constructs a classifier on the set of the cluster representatives, and then propagates the classification decision to the other samples via a local noise model. The proposed model allows to select the most representative samples as well as to avoid repeatedly labeling samples in the same cluster. During the active learning process, the clustering is adjusted using the coarse-to-fine strategy in order to balance between the advantage of large clusters and the accuracy of the data representation. The results of experiments in image databases show a better performance of our algorithm compared to the current methods. § © $ ) © ©</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/10666369" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="2a45116b8a761dc556ce70f96f06d010" rel="nofollow" data-download="{"attachment_id":47222648,"asset_id":10666369,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/47222648/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="26049754" href="https://independent.academia.edu/HieuNguyen171">Hieu Nguyen</a><script data-card-contents-for-user="26049754" type="text/json">{"id":26049754,"first_name":"Hieu","last_name":"Nguyen","domain_name":"independent","page_name":"HieuNguyen171","display_name":"Hieu Nguyen","profile_url":"https://independent.academia.edu/HieuNguyen171?f_ri=1688","photo":"https://0.academia-photos.com/26049754/7771619/8711326/s65_hieu.nguyen.jpg_oh_d408ca50d9c609a37be69449050fefd0_oe_55934482___gda___1433690767_1d88d7f32f4c849476170f20238da5c0"}</script></span></span></li><li class="js-paper-rank-work_10666369 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="10666369"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 10666369, container: ".js-paper-rank-work_10666369", }); });</script></li><li class="js-percentile-work_10666369 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 10666369; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_10666369"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_10666369 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="10666369"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 10666369; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=10666369]").text(description); $(".js-view-count-work_10666369").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_10666369").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="10666369"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">9</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="4803" rel="nofollow" href="https://www.academia.edu/Documents/in/Active_Learning">Active Learning</a>, <script data-card-contents-for-ri="4803" type="text/json">{"id":4803,"name":"Active Learning","url":"https://www.academia.edu/Documents/in/Active_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="10817" rel="nofollow" href="https://www.academia.edu/Documents/in/Markov_Processes">Markov Processes</a>, <script data-card-contents-for-ri="10817" type="text/json">{"id":10817,"name":"Markov Processes","url":"https://www.academia.edu/Documents/in/Markov_Processes?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="12142" rel="nofollow" href="https://www.academia.edu/Documents/in/Bias">Bias</a><script data-card-contents-for-ri="12142" type="text/json">{"id":12142,"name":"Bias","url":"https://www.academia.edu/Documents/in/Bias?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=10666369]'), work: {"id":10666369,"title":"Active learning using pre-clustering","created_at":"2015-02-10T01:23:04.974-08:00","url":"https://www.academia.edu/10666369/Active_learning_using_pre_clustering?f_ri=1688","dom_id":"work_10666369","summary":"The paper is concerned with two-class active learning. While the common approach for collecting data in active learning is to select samples close to the classification boundary, better performance can be achieved by taking into account the prior data distribution. The main contribution of the paper is a formal framework that incorporates clustering into active learning. The algorithm first constructs a classifier on the set of the cluster representatives, and then propagates the classification decision to the other samples via a local noise model. The proposed model allows to select the most representative samples as well as to avoid repeatedly labeling samples in the same cluster. During the active learning process, the clustering is adjusted using the coarse-to-fine strategy in order to balance between the advantage of large clusters and the accuracy of the data representation. The results of experiments in image databases show a better performance of our algorithm compared to the current methods. § © $ ) © ©","downloadable_attachments":[{"id":47222648,"asset_id":10666369,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":26049754,"first_name":"Hieu","last_name":"Nguyen","domain_name":"independent","page_name":"HieuNguyen171","display_name":"Hieu Nguyen","profile_url":"https://independent.academia.edu/HieuNguyen171?f_ri=1688","photo":"https://0.academia-photos.com/26049754/7771619/8711326/s65_hieu.nguyen.jpg_oh_d408ca50d9c609a37be69449050fefd0_oe_55934482___gda___1433690767_1d88d7f32f4c849476170f20238da5c0"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":4803,"name":"Active Learning","url":"https://www.academia.edu/Documents/in/Active_Learning?f_ri=1688","nofollow":true},{"id":10817,"name":"Markov Processes","url":"https://www.academia.edu/Documents/in/Markov_Processes?f_ri=1688","nofollow":true},{"id":12142,"name":"Bias","url":"https://www.academia.edu/Documents/in/Bias?f_ri=1688","nofollow":true},{"id":34109,"name":"Bayesian estimation","url":"https://www.academia.edu/Documents/in/Bayesian_estimation?f_ri=1688"},{"id":69842,"name":"Variance","url":"https://www.academia.edu/Documents/in/Variance?f_ri=1688"},{"id":271153,"name":"Data representation","url":"https://www.academia.edu/Documents/in/Data_representation?f_ri=1688"},{"id":394067,"name":"Data Distribution","url":"https://www.academia.edu/Documents/in/Data_Distribution?f_ri=1688"},{"id":917516,"name":"Image Database","url":"https://www.academia.edu/Documents/in/Image_Database?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_77440800" data-work_id="77440800" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/77440800/The_Frontiers_of_Deep_Reinforcement_Learning_for_Resource_Management_in_Future_Wireless_HetNets_Techniques_Challenges_and_Research_Directions">The Frontiers of Deep Reinforcement Learning for Resource Management in Future Wireless HetNets: Techniques, Challenges, and Research Directions</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Next generation wireless networks are expected to be extremely complex due to their massive heterogeneity in terms of the types of network architectures they incorporate, the types and numbers of smart IoT devices they serve, and the... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_77440800" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Next generation wireless networks are expected to be extremely complex due to their massive heterogeneity in terms of the types of network architectures they incorporate, the types and numbers of smart IoT devices they serve, and the types of emerging applications they support. In such large-scale and heterogeneous networks (HetNets), radio resource allocation and management (RRAM) becomes one of the major challenges encountered during system design and deployment. In this context, emerging Deep Reinforcement Learning (DRL) techniques are expected to be one of the main enabling technologies to address the RRAM in future wireless HetNets. In this paper, we conduct a systematic in-depth, and comprehensive survey of the applications of DRL techniques in RRAM for next generation wireless networks. Towards this, we first overview the existing traditional RRAM methods and identify their limitations that motivate the use of DRL techniques in RRAM. Then, we provide a comprehensive review of the most widely used DRL algorithms to address RRAM problems, including the value-and policy-based algorithms. The advantages, limitations, and use-cases for each algorithm are provided. We then conduct a comprehensive and in-depth literature review and classify existing related works based on both the radio resources they are addressing and the type of wireless networks they are investigating. To this end, we carefully identify the types of DRL algorithms utilized in each related work, the elements of these algorithms, and the main findings of each related work. Finally, we highlight important open challenges and provide insights into several future research directions in the context of DRL-based RRAM. This survey is intentionally designed to guide and stimulate more research endeavors towards building efficient and fine-grained DRL-based RRAM schemes for future wireless networks.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/77440800" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="e8c1af0dbc7c8121d0fbec570e8175c5" rel="nofollow" data-download="{"attachment_id":84784887,"asset_id":77440800,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/84784887/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="36353410" href="https://wmich.academia.edu/ala">Ala Al-Fuqaha</a><script data-card-contents-for-user="36353410" type="text/json">{"id":36353410,"first_name":"Ala","last_name":"Al-Fuqaha","domain_name":"wmich","page_name":"ala","display_name":"Ala Al-Fuqaha","profile_url":"https://wmich.academia.edu/ala?f_ri=1688","photo":"https://0.academia-photos.com/36353410/10456867/11667833/s65_brahimy.ala.jpg"}</script></span></span></li><li class="js-paper-rank-work_77440800 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="77440800"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 77440800, container: ".js-paper-rank-work_77440800", }); });</script></li><li class="js-percentile-work_77440800 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 77440800; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_77440800"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_77440800 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="77440800"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 77440800; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=77440800]").text(description); $(".js-view-count-work_77440800").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_77440800").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="77440800"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">2</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a><script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=77440800]'), work: {"id":77440800,"title":"The Frontiers of Deep Reinforcement Learning for Resource Management in Future Wireless HetNets: Techniques, Challenges, and Research Directions","created_at":"2022-04-24T04:14:47.682-07:00","url":"https://www.academia.edu/77440800/The_Frontiers_of_Deep_Reinforcement_Learning_for_Resource_Management_in_Future_Wireless_HetNets_Techniques_Challenges_and_Research_Directions?f_ri=1688","dom_id":"work_77440800","summary":"Next generation wireless networks are expected to be extremely complex due to their massive heterogeneity in terms of the types of network architectures they incorporate, the types and numbers of smart IoT devices they serve, and the types of emerging applications they support. In such large-scale and heterogeneous networks (HetNets), radio resource allocation and management (RRAM) becomes one of the major challenges encountered during system design and deployment. In this context, emerging Deep Reinforcement Learning (DRL) techniques are expected to be one of the main enabling technologies to address the RRAM in future wireless HetNets. In this paper, we conduct a systematic in-depth, and comprehensive survey of the applications of DRL techniques in RRAM for next generation wireless networks. Towards this, we first overview the existing traditional RRAM methods and identify their limitations that motivate the use of DRL techniques in RRAM. Then, we provide a comprehensive review of the most widely used DRL algorithms to address RRAM problems, including the value-and policy-based algorithms. The advantages, limitations, and use-cases for each algorithm are provided. We then conduct a comprehensive and in-depth literature review and classify existing related works based on both the radio resources they are addressing and the type of wireless networks they are investigating. To this end, we carefully identify the types of DRL algorithms utilized in each related work, the elements of these algorithms, and the main findings of each related work. Finally, we highlight important open challenges and provide insights into several future research directions in the context of DRL-based RRAM. This survey is intentionally designed to guide and stimulate more research endeavors towards building efficient and fine-grained DRL-based RRAM schemes for future wireless networks.","downloadable_attachments":[{"id":84784887,"asset_id":77440800,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":36353410,"first_name":"Ala","last_name":"Al-Fuqaha","domain_name":"wmich","page_name":"ala","display_name":"Ala Al-Fuqaha","profile_url":"https://wmich.academia.edu/ala?f_ri=1688","photo":"https://0.academia-photos.com/36353410/10456867/11667833/s65_brahimy.ala.jpg"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_37426450" data-work_id="37426450" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/37426450/Value_Function_Approximation">Value Function Approximation</a></div></div><div class="u-pb4x u-mt3x"></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/37426450" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="0d154f46e000c08b27a586d059957522" rel="nofollow" data-download="{"attachment_id":57391629,"asset_id":37426450,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/57391629/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="2792810" href="https://independentresearcher.academia.edu/DrJMAshfaqueAMIMAMInstP">Dr. J. M. Ashfaque (MInstP)</a><script data-card-contents-for-user="2792810" type="text/json">{"id":2792810,"first_name":"Dr. J. M.","last_name":"Ashfaque (MInstP)","domain_name":"independentresearcher","page_name":"DrJMAshfaqueAMIMAMInstP","display_name":"Dr. J. M. Ashfaque (MInstP)","profile_url":"https://independentresearcher.academia.edu/DrJMAshfaqueAMIMAMInstP?f_ri=1688","photo":"https://0.academia-photos.com/2792810/914293/18370870/s65_dr._j._m..ashfaque_amima_minstp_.jpg"}</script></span></span></li><li class="js-paper-rank-work_37426450 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="37426450"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 37426450, container: ".js-paper-rank-work_37426450", }); });</script></li><li class="js-percentile-work_37426450 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 37426450; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_37426450"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_37426450 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="37426450"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 37426450; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=37426450]").text(description); $(".js-view-count-work_37426450").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_37426450").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="37426450"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">2</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a><script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=37426450]'), work: {"id":37426450,"title":"Value Function Approximation","created_at":"2018-09-16T14:51:14.503-07:00","url":"https://www.academia.edu/37426450/Value_Function_Approximation?f_ri=1688","dom_id":"work_37426450","summary":null,"downloadable_attachments":[{"id":57391629,"asset_id":37426450,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":2792810,"first_name":"Dr. J. M.","last_name":"Ashfaque (MInstP)","domain_name":"independentresearcher","page_name":"DrJMAshfaqueAMIMAMInstP","display_name":"Dr. J. M. Ashfaque (MInstP)","profile_url":"https://independentresearcher.academia.edu/DrJMAshfaqueAMIMAMInstP?f_ri=1688","photo":"https://0.academia-photos.com/2792810/914293/18370870/s65_dr._j._m..ashfaque_amima_minstp_.jpg"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_28789345" data-work_id="28789345" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/28789345/Event_Triggered_Optimal_Adaptive_Control_Algorithm_for_Continuous_Time_Nonlinear_Systems">Event-Triggered Optimal Adaptive Control Algorithm for Continuous-Time Nonlinear Systems</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">This paper proposes a novel optimal adaptive event-triggered control algorithm for nonlinear continuous-time systems. The goal is to reduce the controller updates, by sampling the state only when an event is triggered to maintain... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_28789345" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">This paper proposes a novel optimal adaptive event-triggered<br />control algorithm for nonlinear continuous-time systems. The goal<br />is to reduce the controller updates, by sampling the state only<br />when an event is triggered to maintain stability and optimality.<br />The online algorithm is implemented based on an actor/critic<br />neural network structure. A critic neural network is used to<br />approximate the cost and an actor neural network is used to<br />approximate the optimal {event-triggered controller}. Since in the<br />algorithm proposed there are dynamics that exhibit continuous<br />evolutions described by ordinary differential equations and<br />instantaneous jumps or impulses, we will use an impulsive system<br />approach. A Lyapunov stability proof ensures that the closed-loop<br />system is asymptotically stable. Finally, we illustrate the effectiveness of the proposed solution compared to a time-triggered controller.<br /><br />Citation: Kyriakos G. Vamvoudakis. Event-triggered optimal adaptive control algorithm for continuous-time nonlinear systems. IEEE/CAA Journal of Automatica Sinica, 2014, 1(3): 282-293</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/28789345" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="7bae42f25d81dbef16c3ad15e4211a44" rel="nofollow" data-download="{"attachment_id":49207570,"asset_id":28789345,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/49207570/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="53180586" href="https://independent.academia.edu/IEEECAAJAS">IEEE/CAA J. Autom. Sinica</a><script data-card-contents-for-user="53180586" type="text/json">{"id":53180586,"first_name":"IEEE/CAA","last_name":"J. Autom. Sinica","domain_name":"independent","page_name":"IEEECAAJAS","display_name":"IEEE/CAA J. Autom. Sinica","profile_url":"https://independent.academia.edu/IEEECAAJAS?f_ri=1688","photo":"https://0.academia-photos.com/53180586/14177798/22465909/s65_ieee_caa.j._autom._sinica.jpg"}</script></span></span></li><li class="js-paper-rank-work_28789345 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="28789345"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 28789345, container: ".js-paper-rank-work_28789345", }); });</script></li><li class="js-percentile-work_28789345 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 28789345; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_28789345"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_28789345 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="28789345"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 28789345; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=28789345]").text(description); $(".js-view-count-work_28789345").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_28789345").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="28789345"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">3</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2200" rel="nofollow" href="https://www.academia.edu/Documents/in/Optimal_Control">Optimal Control</a>, <script data-card-contents-for-ri="2200" type="text/json">{"id":2200,"name":"Optimal Control","url":"https://www.academia.edu/Documents/in/Optimal_Control?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="8422" rel="nofollow" href="https://www.academia.edu/Documents/in/Adaptive_Control">Adaptive Control</a><script data-card-contents-for-ri="8422" type="text/json">{"id":8422,"name":"Adaptive Control","url":"https://www.academia.edu/Documents/in/Adaptive_Control?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=28789345]'), work: {"id":28789345,"title":"Event-Triggered Optimal Adaptive Control Algorithm for Continuous-Time Nonlinear Systems","created_at":"2016-09-29T03:52:52.503-07:00","url":"https://www.academia.edu/28789345/Event_Triggered_Optimal_Adaptive_Control_Algorithm_for_Continuous_Time_Nonlinear_Systems?f_ri=1688","dom_id":"work_28789345","summary":"This paper proposes a novel optimal adaptive event-triggered\ncontrol algorithm for nonlinear continuous-time systems. The goal\nis to reduce the controller updates, by sampling the state only\nwhen an event is triggered to maintain stability and optimality.\nThe online algorithm is implemented based on an actor/critic\nneural network structure. A critic neural network is used to\napproximate the cost and an actor neural network is used to\napproximate the optimal {event-triggered controller}. Since in the\nalgorithm proposed there are dynamics that exhibit continuous\nevolutions described by ordinary differential equations and\ninstantaneous jumps or impulses, we will use an impulsive system\napproach. A Lyapunov stability proof ensures that the closed-loop\nsystem is asymptotically stable. Finally, we illustrate the effectiveness of the proposed solution compared to a time-triggered controller.\n\nCitation: Kyriakos G. Vamvoudakis. Event-triggered optimal adaptive control algorithm for continuous-time nonlinear systems. IEEE/CAA Journal of Automatica Sinica, 2014, 1(3): 282-293","downloadable_attachments":[{"id":49207570,"asset_id":28789345,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":53180586,"first_name":"IEEE/CAA","last_name":"J. Autom. Sinica","domain_name":"independent","page_name":"IEEECAAJAS","display_name":"IEEE/CAA J. Autom. Sinica","profile_url":"https://independent.academia.edu/IEEECAAJAS?f_ri=1688","photo":"https://0.academia-photos.com/53180586/14177798/22465909/s65_ieee_caa.j._autom._sinica.jpg"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2200,"name":"Optimal Control","url":"https://www.academia.edu/Documents/in/Optimal_Control?f_ri=1688","nofollow":true},{"id":8422,"name":"Adaptive Control","url":"https://www.academia.edu/Documents/in/Adaptive_Control?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_8867992" data-work_id="8867992" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/8867992/Cognitive_Radio_for_5G_small_cell_Communications">Cognitive Radio for 5G small-cell Communications</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">This report focuses on the Access network implementation of a 5G small-cell communication system using Cognitive Radio. 5G systems are the next major wireless communications standard and Cognitive Radio is being heralded as a valid... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_8867992" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">This report focuses on the Access network implementation of a 5G small-cell communication system using Cognitive Radio. 5G systems are the next major wireless communications standard and Cognitive Radio is being heralded as a valid implementation method when the wireless communications industry finally concludes on a fifth generation standard. The project is concerned with implementing a Beyond-Next-Generation (5G) communications network based on Cognitive Radio. A functioning model is developed in matlab, the purpose of which is to show that Cognitive Radio based 5G systems will give acceptable Quality-of-Service and energy efficiency levels.<br />The Access network implementation of the system involves channel assignment for the access links which provide network access to mobile subscribers. Suitable channels to service call requests are determined based on cognition. <br />The system performance is evaluated by estimating the system blocking and dropping probabilities and the percentage of energy saved.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/8867992" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="4262f9a32a6c6643962252d29a930171" rel="nofollow" data-download="{"attachment_id":35203598,"asset_id":8867992,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/35203598/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="17251517" href="https://york.academia.edu/OgheneovieAjemuta">Ogheneovie Ajemuta</a><script data-card-contents-for-user="17251517" type="text/json">{"id":17251517,"first_name":"Ogheneovie","last_name":"Ajemuta","domain_name":"york","page_name":"OgheneovieAjemuta","display_name":"Ogheneovie Ajemuta","profile_url":"https://york.academia.edu/OgheneovieAjemuta?f_ri=1688","photo":"https://0.academia-photos.com/17251517/5404861/6167238/s65_ogheneovie.ajemuta.jpg"}</script></span></span></li><li class="js-paper-rank-work_8867992 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="8867992"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 8867992, container: ".js-paper-rank-work_8867992", }); });</script></li><li class="js-percentile-work_8867992 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 8867992; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_8867992"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_8867992 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="8867992"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 8867992; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=8867992]").text(description); $(".js-view-count-work_8867992").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_8867992").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="8867992"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">13</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="87" rel="nofollow" href="https://www.academia.edu/Documents/in/Telecommunications_Engineering">Telecommunications Engineering</a>, <script data-card-contents-for-ri="87" type="text/json">{"id":87,"name":"Telecommunications Engineering","url":"https://www.academia.edu/Documents/in/Telecommunications_Engineering?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="757" rel="nofollow" href="https://www.academia.edu/Documents/in/Game_Theory">Game Theory</a>, <script data-card-contents-for-ri="757" type="text/json">{"id":757,"name":"Game Theory","url":"https://www.academia.edu/Documents/in/Game_Theory?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="6379" rel="nofollow" href="https://www.academia.edu/Documents/in/Cognitive_Radio_Networks">Cognitive Radio Networks</a><script data-card-contents-for-ri="6379" type="text/json">{"id":6379,"name":"Cognitive Radio Networks","url":"https://www.academia.edu/Documents/in/Cognitive_Radio_Networks?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=8867992]'), work: {"id":8867992,"title":"Cognitive Radio for 5G small-cell Communications","created_at":"2014-10-20T02:13:46.349-07:00","url":"https://www.academia.edu/8867992/Cognitive_Radio_for_5G_small_cell_Communications?f_ri=1688","dom_id":"work_8867992","summary":"This report focuses on the Access network implementation of a 5G small-cell communication system using Cognitive Radio. 5G systems are the next major wireless communications standard and Cognitive Radio is being heralded as a valid implementation method when the wireless communications industry finally concludes on a fifth generation standard. The project is concerned with implementing a Beyond-Next-Generation (5G) communications network based on Cognitive Radio. A functioning model is developed in matlab, the purpose of which is to show that Cognitive Radio based 5G systems will give acceptable Quality-of-Service and energy efficiency levels.\nThe Access network implementation of the system involves channel assignment for the access links which provide network access to mobile subscribers. Suitable channels to service call requests are determined based on cognition. \nThe system performance is evaluated by estimating the system blocking and dropping probabilities and the percentage of energy saved. \n","downloadable_attachments":[{"id":35203598,"asset_id":8867992,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":17251517,"first_name":"Ogheneovie","last_name":"Ajemuta","domain_name":"york","page_name":"OgheneovieAjemuta","display_name":"Ogheneovie Ajemuta","profile_url":"https://york.academia.edu/OgheneovieAjemuta?f_ri=1688","photo":"https://0.academia-photos.com/17251517/5404861/6167238/s65_ogheneovie.ajemuta.jpg"}],"research_interests":[{"id":87,"name":"Telecommunications Engineering","url":"https://www.academia.edu/Documents/in/Telecommunications_Engineering?f_ri=1688","nofollow":true},{"id":757,"name":"Game Theory","url":"https://www.academia.edu/Documents/in/Game_Theory?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":6379,"name":"Cognitive Radio Networks","url":"https://www.academia.edu/Documents/in/Cognitive_Radio_Networks?f_ri=1688","nofollow":true},{"id":6380,"name":"Dynamic Spectrum Access","url":"https://www.academia.edu/Documents/in/Dynamic_Spectrum_Access?f_ri=1688"},{"id":29919,"name":"Access Networks","url":"https://www.academia.edu/Documents/in/Access_Networks?f_ri=1688"},{"id":36077,"name":"Energy Management","url":"https://www.academia.edu/Documents/in/Energy_Management?f_ri=1688"},{"id":41252,"name":"Spectrum Sensing","url":"https://www.academia.edu/Documents/in/Spectrum_Sensing?f_ri=1688"},{"id":77104,"name":"Wireless Backhaul","url":"https://www.academia.edu/Documents/in/Wireless_Backhaul?f_ri=1688"},{"id":161989,"name":"Cognitive Radios","url":"https://www.academia.edu/Documents/in/Cognitive_Radios?f_ri=1688"},{"id":1116907,"name":"Wireless Network Channel Assignments","url":"https://www.academia.edu/Documents/in/Wireless_Network_Channel_Assignments?f_ri=1688"},{"id":1162140,"name":"Next generation wireless networks","url":"https://www.academia.edu/Documents/in/Next_generation_wireless_networks?f_ri=1688"},{"id":1233593,"name":"Fifth Generation Wireless Technologies","url":"https://www.academia.edu/Documents/in/Fifth_Generation_Wireless_Technologies?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_79892301" data-work_id="79892301" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/79892301/EER_RL_Energy_Efficient_Routing_Based_on_Reinforcement_Learning">EER-RL: Energy-Efficient Routing Based on Reinforcement Learning</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Wireless sensor devices are the backbone of the Internet of things (IoT), enabling real-world objects and human beings to be connected to the Internet and interact with each other to improve citizens’ living conditions. However, IoT... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_79892301" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Wireless sensor devices are the backbone of the Internet of things (IoT), enabling real-world objects and human beings to be connected to the Internet and interact with each other to improve citizens’ living conditions. However, IoT devices are memory and power-constrained and do not allow high computational applications, whereas the routing task is what makes an object to be part of an IoT network despite of being a high power-consuming task. Therefore, energy efficiency is a crucial factor to consider when designing a routing protocol for IoT wireless networks. In this paper, we propose EER-RL, an energy-efficient routing protocol based on reinforcement learning. Reinforcement learning (RL) allows devices to adapt to network changes, such as mobility and energy level, and improve routing decisions. The performance of the proposed protocol is compared with other existing energy-efficient routing protocols, and the results show that the proposed protocol performs better in terms of ...</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/79892301" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="edd1152686110a676910b7dda55e19ec" rel="nofollow" data-download="{"attachment_id":86454833,"asset_id":79892301,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/86454833/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="104758854" href="https://independent.academia.edu/Viallykazadimutombo">Vially kazadi mutombo</a><script data-card-contents-for-user="104758854" type="text/json">{"id":104758854,"first_name":"Vially","last_name":"kazadi mutombo","domain_name":"independent","page_name":"Viallykazadimutombo","display_name":"Vially kazadi mutombo","profile_url":"https://independent.academia.edu/Viallykazadimutombo?f_ri=1688","photo":"https://0.academia-photos.com/104758854/25248522/24016019/s65_vially.kazadi_mutombo.jpg"}</script></span></span></li><li class="js-paper-rank-work_79892301 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="79892301"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 79892301, container: ".js-paper-rank-work_79892301", }); });</script></li><li class="js-percentile-work_79892301 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 79892301; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_79892301"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_79892301 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="79892301"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 79892301; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=79892301]").text(description); $(".js-view-count-work_79892301").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_79892301").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="79892301"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">3</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="303967" rel="nofollow" href="https://www.academia.edu/Documents/in/Mobile_Information_Systems">Mobile Information Systems</a><script data-card-contents-for-ri="303967" type="text/json">{"id":303967,"name":"Mobile Information Systems","url":"https://www.academia.edu/Documents/in/Mobile_Information_Systems?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=79892301]'), work: {"id":79892301,"title":"EER-RL: Energy-Efficient Routing Based on Reinforcement Learning","created_at":"2022-05-25T05:51:48.030-07:00","url":"https://www.academia.edu/79892301/EER_RL_Energy_Efficient_Routing_Based_on_Reinforcement_Learning?f_ri=1688","dom_id":"work_79892301","summary":"Wireless sensor devices are the backbone of the Internet of things (IoT), enabling real-world objects and human beings to be connected to the Internet and interact with each other to improve citizens’ living conditions. However, IoT devices are memory and power-constrained and do not allow high computational applications, whereas the routing task is what makes an object to be part of an IoT network despite of being a high power-consuming task. Therefore, energy efficiency is a crucial factor to consider when designing a routing protocol for IoT wireless networks. In this paper, we propose EER-RL, an energy-efficient routing protocol based on reinforcement learning. Reinforcement learning (RL) allows devices to adapt to network changes, such as mobility and energy level, and improve routing decisions. The performance of the proposed protocol is compared with other existing energy-efficient routing protocols, and the results show that the proposed protocol performs better in terms of ...","downloadable_attachments":[{"id":86454833,"asset_id":79892301,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":104758854,"first_name":"Vially","last_name":"kazadi mutombo","domain_name":"independent","page_name":"Viallykazadimutombo","display_name":"Vially kazadi mutombo","profile_url":"https://independent.academia.edu/Viallykazadimutombo?f_ri=1688","photo":"https://0.academia-photos.com/104758854/25248522/24016019/s65_vially.kazadi_mutombo.jpg"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":303967,"name":"Mobile Information Systems","url":"https://www.academia.edu/Documents/in/Mobile_Information_Systems?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_10235904" data-work_id="10235904" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/10235904/Reinforcement_learning_for_a_biped_robot_to_climb_sloping_surfaces">Reinforcement learning for a biped robot to climb sloping surfaces</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">A neural network mechanism is proposed to modify the gait of a biped robot that walks on sloping surfaces using sensory inputs. The robot climbs a sloping surface from a level surface with no priori knowledge of the inclination of the... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_10235904" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">A neural network mechanism is proposed to modify the gait of a biped robot that walks on sloping surfaces using sensory inputs. The robot climbs a sloping surface from a level surface with no priori knowledge of the inclination of the surface. By training the neural network while the robot is walking, the robot adjusts its gait and finally forms a gait that is as stable as when it walks on the level surface. The neural network is trained by a reinforcement learning mechanism while proportional and integral (PI) control is used for position control of the robot joints. Experiments of static and pseudo dynamic learning are performed to show the validity of the proposed reinforcement learning mechanism.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/10235904" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="47b4a4296b084e1123925d175fefd68e" rel="nofollow" data-download="{"attachment_id":47474041,"asset_id":10235904,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/47474041/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="25049046" href="https://independent.academia.edu/yuanzheng3">yuan zheng</a><script data-card-contents-for-user="25049046" type="text/json">{"id":25049046,"first_name":"yuan","last_name":"zheng","domain_name":"independent","page_name":"yuanzheng3","display_name":"yuan zheng","profile_url":"https://independent.academia.edu/yuanzheng3?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_10235904 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="10235904"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 10235904, container: ".js-paper-rank-work_10235904", }); });</script></li><li class="js-percentile-work_10235904 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 10235904; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_10235904"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_10235904 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="10235904"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 10235904; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=10235904]").text(description); $(".js-view-count-work_10235904").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_10235904").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="10235904"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">5</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="60" rel="nofollow" href="https://www.academia.edu/Documents/in/Mechanical_Engineering">Mechanical Engineering</a>, <script data-card-contents-for-ri="60" type="text/json">{"id":60,"name":"Mechanical Engineering","url":"https://www.academia.edu/Documents/in/Mechanical_Engineering?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="5716" rel="nofollow" href="https://www.academia.edu/Documents/in/Hybrid_Neural-robotic_Systems">Hybrid Neural-robotic Systems</a>, <script data-card-contents-for-ri="5716" type="text/json">{"id":5716,"name":"Hybrid Neural-robotic Systems","url":"https://www.academia.edu/Documents/in/Hybrid_Neural-robotic_Systems?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="188138" rel="nofollow" href="https://www.academia.edu/Documents/in/Biped_Robot">Biped Robot</a><script data-card-contents-for-ri="188138" type="text/json">{"id":188138,"name":"Biped Robot","url":"https://www.academia.edu/Documents/in/Biped_Robot?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=10235904]'), work: {"id":10235904,"title":"Reinforcement learning for a biped robot to climb sloping surfaces","created_at":"2015-01-19T17:55:25.884-08:00","url":"https://www.academia.edu/10235904/Reinforcement_learning_for_a_biped_robot_to_climb_sloping_surfaces?f_ri=1688","dom_id":"work_10235904","summary":"A neural network mechanism is proposed to modify the gait of a biped robot that walks on sloping surfaces using sensory inputs. The robot climbs a sloping surface from a level surface with no priori knowledge of the inclination of the surface. By training the neural network while the robot is walking, the robot adjusts its gait and finally forms a gait that is as stable as when it walks on the level surface. The neural network is trained by a reinforcement learning mechanism while proportional and integral (PI) control is used for position control of the robot joints. Experiments of static and pseudo dynamic learning are performed to show the validity of the proposed reinforcement learning mechanism.","downloadable_attachments":[{"id":47474041,"asset_id":10235904,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":25049046,"first_name":"yuan","last_name":"zheng","domain_name":"independent","page_name":"yuanzheng3","display_name":"yuan zheng","profile_url":"https://independent.academia.edu/yuanzheng3?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":60,"name":"Mechanical Engineering","url":"https://www.academia.edu/Documents/in/Mechanical_Engineering?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":5716,"name":"Hybrid Neural-robotic Systems","url":"https://www.academia.edu/Documents/in/Hybrid_Neural-robotic_Systems?f_ri=1688","nofollow":true},{"id":188138,"name":"Biped Robot","url":"https://www.academia.edu/Documents/in/Biped_Robot?f_ri=1688","nofollow":true},{"id":1237788,"name":"Electrical And Electronic Engineering","url":"https://www.academia.edu/Documents/in/Electrical_And_Electronic_Engineering?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_49677506" data-work_id="49677506" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/49677506/Sensitivity_to_financial_rewards_and_impression_management_links_to_smartphone_use_and_dependence">Sensitivity to financial rewards and impression management links to smartphone use and dependence</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Computational modeling and brain imaging studies suggest that sensitivity to rewards and behaviorist learning principles partly explain smartphone engagement patterns and potentially smartphone dependence. Responses to a questionnaire,... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_49677506" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Computational modeling and brain imaging studies suggest that sensitivity to rewards and behaviorist learning principles partly explain smartphone engagement patterns and potentially smartphone dependence. Responses to a questionnaire, and observational measures of smartphone use were recorded for 121 university students. Each participant was also tested with a laboratory task of reward sensitivity and a test of verbal operant conditioning. Twenty-three percent of the sample had probable smartphone addiction. Using multivariate regression, smartphone use, particularly the number of instant messenger services employed, was shown to be significantly and independently predicted by reward sensitivity (a positive relationship), and by instrumental conditioning (a negative relationship). However, the latter association was driven by a subset of participants who developed declarative knowledge of the response-reinforcer contingency. This suggests a process of impression management driven by experimental demand characteristics, producing goal-directed instrumental behavior not habit-based learning. No other measures of smartphone use, including the self-report scale, were significantly associated with the experimental tasks. We conclude that stronger engagement with smartphones, in particular instant messenger services, may be linked to people being more sensitive to rewarding stimuli, suggestive of a motivational or learning mechanism. We propose that this mechanism could underly problem smartphone use and dependence. It also potentially explains why some aspects of smartphone use, such as habitual actions, appear to be poorly measured by technology-use questionnaires. A serendipitous secondary finding confirmed that smartphone use reflected active self-presentation. Our ‘conditioning’ task-induced this behavior in the laboratory and could be used in social-cognition experimental studies.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/49677506" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="49d5d1875988dc45b0a41c3ceecedd68" rel="nofollow" data-download="{"attachment_id":67962175,"asset_id":49677506,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/67962175/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="1532950" href="https://chula.academia.edu/GrahamPluck">Graham Pluck</a><script data-card-contents-for-user="1532950" type="text/json">{"id":1532950,"first_name":"Graham","last_name":"Pluck","domain_name":"chula","page_name":"GrahamPluck","display_name":"Graham Pluck","profile_url":"https://chula.academia.edu/GrahamPluck?f_ri=1688","photo":"https://0.academia-photos.com/1532950/542424/150677826/s65_graham.pluck.jpg"}</script></span></span></li><li class="js-paper-rank-work_49677506 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="49677506"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 49677506, container: ".js-paper-rank-work_49677506", }); });</script></li><li class="js-percentile-work_49677506 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 49677506; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_49677506"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_49677506 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="49677506"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 49677506; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=49677506]").text(description); $(".js-view-count-work_49677506").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_49677506").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="49677506"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">12</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="1580" rel="nofollow" href="https://www.academia.edu/Documents/in/Cyberpsychology">Cyberpsychology</a>, <script data-card-contents-for-ri="1580" type="text/json">{"id":1580,"name":"Cyberpsychology","url":"https://www.academia.edu/Documents/in/Cyberpsychology?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="31969" rel="nofollow" href="https://www.academia.edu/Documents/in/Impression_Management">Impression Management</a>, <script data-card-contents-for-ri="31969" type="text/json">{"id":31969,"name":"Impression Management","url":"https://www.academia.edu/Documents/in/Impression_Management?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="34712" rel="nofollow" href="https://www.academia.edu/Documents/in/Addiction_Psychology_">Addiction (Psychology)</a><script data-card-contents-for-ri="34712" type="text/json">{"id":34712,"name":"Addiction (Psychology)","url":"https://www.academia.edu/Documents/in/Addiction_Psychology_?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=49677506]'), work: {"id":49677506,"title":"Sensitivity to financial rewards and impression management links to smartphone use and dependence","created_at":"2021-07-09T21:55:54.443-07:00","url":"https://www.academia.edu/49677506/Sensitivity_to_financial_rewards_and_impression_management_links_to_smartphone_use_and_dependence?f_ri=1688","dom_id":"work_49677506","summary":"Computational modeling and brain imaging studies suggest that sensitivity to rewards and behaviorist learning principles partly explain smartphone engagement patterns and potentially smartphone dependence. Responses to a questionnaire, and observational measures of smartphone use were recorded for 121 university students. Each participant was also tested with a laboratory task of reward sensitivity and a test of verbal operant conditioning. Twenty-three percent of the sample had probable smartphone addiction. Using multivariate regression, smartphone use, particularly the number of instant messenger services employed, was shown to be significantly and independently predicted by reward sensitivity (a positive relationship), and by instrumental conditioning (a negative relationship). However, the latter association was driven by a subset of participants who developed declarative knowledge of the response-reinforcer contingency. This suggests a process of impression management driven by experimental demand characteristics, producing goal-directed instrumental behavior not habit-based learning. No other measures of smartphone use, including the self-report scale, were significantly associated with the experimental tasks. We conclude that stronger engagement with smartphones, in particular instant messenger services, may be linked to people being more sensitive to rewarding stimuli, suggestive of a motivational or learning mechanism. We propose that this mechanism could underly problem smartphone use and dependence. It also potentially explains why some aspects of smartphone use, such as habitual actions, appear to be poorly measured by technology-use questionnaires. A serendipitous secondary finding confirmed that smartphone use reflected active self-presentation. Our ‘conditioning’ task-induced this behavior in the laboratory and could be used in social-cognition experimental studies.","downloadable_attachments":[{"id":67962175,"asset_id":49677506,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":1532950,"first_name":"Graham","last_name":"Pluck","domain_name":"chula","page_name":"GrahamPluck","display_name":"Graham Pluck","profile_url":"https://chula.academia.edu/GrahamPluck?f_ri=1688","photo":"https://0.academia-photos.com/1532950/542424/150677826/s65_graham.pluck.jpg"}],"research_interests":[{"id":1580,"name":"Cyberpsychology","url":"https://www.academia.edu/Documents/in/Cyberpsychology?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":31969,"name":"Impression Management","url":"https://www.academia.edu/Documents/in/Impression_Management?f_ri=1688","nofollow":true},{"id":34712,"name":"Addiction (Psychology)","url":"https://www.academia.edu/Documents/in/Addiction_Psychology_?f_ri=1688","nofollow":true},{"id":40541,"name":"Instant Messaging","url":"https://www.academia.edu/Documents/in/Instant_Messaging?f_ri=1688"},{"id":81221,"name":"Smartphones","url":"https://www.academia.edu/Documents/in/Smartphones?f_ri=1688"},{"id":104127,"name":"Reward based learning","url":"https://www.academia.edu/Documents/in/Reward_based_learning?f_ri=1688"},{"id":128286,"name":"CONDUCTISMO","url":"https://www.academia.edu/Documents/in/CONDUCTISMO?f_ri=1688"},{"id":152144,"name":"Behaviorism","url":"https://www.academia.edu/Documents/in/Behaviorism?f_ri=1688"},{"id":256465,"name":"Social Networking \u0026 Social Media","url":"https://www.academia.edu/Documents/in/Social_Networking_and_Social_Media?f_ri=1688"},{"id":357676,"name":"Social Media Addiction","url":"https://www.academia.edu/Documents/in/Social_Media_Addiction?f_ri=1688"},{"id":477865,"name":"Operant Conditioning","url":"https://www.academia.edu/Documents/in/Operant_Conditioning?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_77808004" data-work_id="77808004" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/77808004/Coordinated_Random_Access_for_Industrial_IoT_With_Correlated_Traffic_By_Reinforcement_Learning">Coordinated Random Access for Industrial IoT With Correlated Traffic By Reinforcement-Learning</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">We propose a coordinated random access scheme for industrial internet-of-things (IIoT) scenarios, with machinetype devices (MTDs) generating sporadic correlated traffic. This occurs, e.g., when external events trigger data generation at... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_77808004" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">We propose a coordinated random access scheme for industrial internet-of-things (IIoT) scenarios, with machinetype devices (MTDs) generating sporadic correlated traffic. This occurs, e.g., when external events trigger data generation at multiple MTDs simultaneously. Time is divided into frames, each split into slots and each MTD randomly selects one slot for (re)transmission, with probability density functions (PDFs) specific of both the MTD and the number of the current retransmission. PDFs are locally optimized to minimize the probability of packet collision. The optimization problem is modeled as a repeated Markov game with incomplete information, and the linear reward-inaction algorithm is used at each MTD, which provably converges to a deterministic (suboptimal) slot assignment. We compare our solution with both the slotted ALOHA and the min-max pairwise correlation random access schemes, showing that our approach achieves a higher network throughput with moderate traffic intensity.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/77808004" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="81f58435606d8ebceda11a01d732036c" rel="nofollow" data-download="{"attachment_id":85072033,"asset_id":77808004,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/85072033/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="25833880" href="https://unipd.academia.edu/StefanoTomasin">Stefano Tomasin</a><script data-card-contents-for-user="25833880" type="text/json">{"id":25833880,"first_name":"Stefano","last_name":"Tomasin","domain_name":"unipd","page_name":"StefanoTomasin","display_name":"Stefano Tomasin","profile_url":"https://unipd.academia.edu/StefanoTomasin?f_ri=1688","photo":"https://0.academia-photos.com/25833880/7081103/34329802/s65_stefano.tomasin.jpg"}</script></span></span></li><li class="js-paper-rank-work_77808004 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="77808004"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 77808004, container: ".js-paper-rank-work_77808004", }); });</script></li><li class="js-percentile-work_77808004 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 77808004; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_77808004"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_77808004 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="77808004"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 77808004; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=77808004]").text(description); $(".js-view-count-work_77808004").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_77808004").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="77808004"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">3</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="3193313" rel="nofollow" href="https://www.academia.edu/Documents/in/arXiv">arXiv</a><script data-card-contents-for-ri="3193313" type="text/json">{"id":3193313,"name":"arXiv","url":"https://www.academia.edu/Documents/in/arXiv?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=77808004]'), work: {"id":77808004,"title":"Coordinated Random Access for Industrial IoT With Correlated Traffic By Reinforcement-Learning","created_at":"2022-04-27T12:29:23.864-07:00","url":"https://www.academia.edu/77808004/Coordinated_Random_Access_for_Industrial_IoT_With_Correlated_Traffic_By_Reinforcement_Learning?f_ri=1688","dom_id":"work_77808004","summary":"We propose a coordinated random access scheme for industrial internet-of-things (IIoT) scenarios, with machinetype devices (MTDs) generating sporadic correlated traffic. This occurs, e.g., when external events trigger data generation at multiple MTDs simultaneously. Time is divided into frames, each split into slots and each MTD randomly selects one slot for (re)transmission, with probability density functions (PDFs) specific of both the MTD and the number of the current retransmission. PDFs are locally optimized to minimize the probability of packet collision. The optimization problem is modeled as a repeated Markov game with incomplete information, and the linear reward-inaction algorithm is used at each MTD, which provably converges to a deterministic (suboptimal) slot assignment. We compare our solution with both the slotted ALOHA and the min-max pairwise correlation random access schemes, showing that our approach achieves a higher network throughput with moderate traffic intensity.","downloadable_attachments":[{"id":85072033,"asset_id":77808004,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":25833880,"first_name":"Stefano","last_name":"Tomasin","domain_name":"unipd","page_name":"StefanoTomasin","display_name":"Stefano Tomasin","profile_url":"https://unipd.academia.edu/StefanoTomasin?f_ri=1688","photo":"https://0.academia-photos.com/25833880/7081103/34329802/s65_stefano.tomasin.jpg"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":3193313,"name":"arXiv","url":"https://www.academia.edu/Documents/in/arXiv?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_70060935" data-work_id="70060935" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/70060935/Refining_Human_Behavior_Models_in_a_Context_based_Architecture">Refining Human Behavior Models in a Context-based Architecture</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">This paper describes an investigation into the refinement of context -based human behavior models through the use of experiential learning. Specifically, a tactical agent was endowed with a context -based control model developed through... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_70060935" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">This paper describes an investigation into the refinement of context -based human behavior models through the use of experiential learning. Specifically, a tactical agent was endowed with a context -based control model developed through other means and tasked with a mission in a simulation. This simulation-based mission was employed to expose the agent to situations possibly not considered in the model’s original construction. Reinforcement learning was used to evaluate and refine the performance of this agent to improve its effectiveness and generality. Introduction and Background How one makes a decision when faced with a task can be described as that person’s behaviour. The Oxford dictionary [1], defines “behaviour” as ‘the actions or reactions of a person or animal in response to external or internal stimuli’. Human behaviours are, consequently the actions or reactions of a human in response to some external or internal stimuli. The external stimuli include touch, smell, sight, ...</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/70060935" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="4e80acd78956a1533550583a7f4815e8" rel="nofollow" data-download="{"attachment_id":79935138,"asset_id":70060935,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/79935138/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="34538752" href="https://independent.academia.edu/AvelinoGonzalez3">Avelino Gonzalez</a><script data-card-contents-for-user="34538752" type="text/json">{"id":34538752,"first_name":"Avelino","last_name":"Gonzalez","domain_name":"independent","page_name":"AvelinoGonzalez3","display_name":"Avelino Gonzalez","profile_url":"https://independent.academia.edu/AvelinoGonzalez3?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_70060935 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="70060935"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 70060935, container: ".js-paper-rank-work_70060935", }); });</script></li><li class="js-percentile-work_70060935 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 70060935; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_70060935"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_70060935 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="70060935"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 70060935; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=70060935]").text(description); $(".js-view-count-work_70060935").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_70060935").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="70060935"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">5</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="50574" rel="nofollow" href="https://www.academia.edu/Documents/in/Experiential_Learning">Experiential Learning</a>, <script data-card-contents-for-ri="50574" type="text/json">{"id":50574,"name":"Experiential Learning","url":"https://www.academia.edu/Documents/in/Experiential_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="87818" rel="nofollow" href="https://www.academia.edu/Documents/in/Human_behavior">Human behavior</a><script data-card-contents-for-ri="87818" type="text/json">{"id":87818,"name":"Human behavior","url":"https://www.academia.edu/Documents/in/Human_behavior?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=70060935]'), work: {"id":70060935,"title":"Refining Human Behavior Models in a Context-based Architecture","created_at":"2022-01-30T10:40:48.237-08:00","url":"https://www.academia.edu/70060935/Refining_Human_Behavior_Models_in_a_Context_based_Architecture?f_ri=1688","dom_id":"work_70060935","summary":"This paper describes an investigation into the refinement of context -based human behavior models through the use of experiential learning. Specifically, a tactical agent was endowed with a context -based control model developed through other means and tasked with a mission in a simulation. This simulation-based mission was employed to expose the agent to situations possibly not considered in the model’s original construction. Reinforcement learning was used to evaluate and refine the performance of this agent to improve its effectiveness and generality. Introduction and Background How one makes a decision when faced with a task can be described as that person’s behaviour. The Oxford dictionary [1], defines “behaviour” as ‘the actions or reactions of a person or animal in response to external or internal stimuli’. Human behaviours are, consequently the actions or reactions of a human in response to some external or internal stimuli. The external stimuli include touch, smell, sight, ...","downloadable_attachments":[{"id":79935138,"asset_id":70060935,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":34538752,"first_name":"Avelino","last_name":"Gonzalez","domain_name":"independent","page_name":"AvelinoGonzalez3","display_name":"Avelino Gonzalez","profile_url":"https://independent.academia.edu/AvelinoGonzalez3?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":50574,"name":"Experiential Learning","url":"https://www.academia.edu/Documents/in/Experiential_Learning?f_ri=1688","nofollow":true},{"id":87818,"name":"Human behavior","url":"https://www.academia.edu/Documents/in/Human_behavior?f_ri=1688","nofollow":true},{"id":235574,"name":"Model development","url":"https://www.academia.edu/Documents/in/Model_development?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_1364819" data-work_id="1364819" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/1364819/Inventory_management_in_supply_chains_a_reinforcement_learning_approach">Inventory management in supply chains: a reinforcement learning approach</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">A major issue in supply chain inventory management is the coordination of inventory policies adopted by di!erent supply chain actors, such as suppliers, manufacturers, distributors, so as to smooth material #ow and minimize costs while... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_1364819" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">A major issue in supply chain inventory management is the coordination of inventory policies adopted by di!erent supply chain actors, such as suppliers, manufacturers, distributors, so as to smooth material #ow and minimize costs while responsively meeting customer demand. This paper presents an approach to manage inventory decisions at all stages of the supply chain in an integrated manner. It allows an inventory order policy to be determined, which is aimed at optimizing the performance of the whole supply chain. The approach consists of three techniques: (i) Markov decision processes (MDP) and (ii) an arti"cial intelligent algorithm to solve MDPs, which is based on (iii) simulation modeling. In particular, the inventory problem is modeled as an MDP and a reinforcement learning (RL) algorithm is used to determine a near optimal inventory policy under an average reward criterion. RL is a simulation-based stochastic technique that proves very e$cient particularly when the MDP size is large.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/1364819" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="fa9fabf41476ff866ca35dca2b3b0ff6" rel="nofollow" data-download="{"attachment_id":51010293,"asset_id":1364819,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/51010293/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="384148" href="https://poliba.academia.edu/IlariaGiannoccaro">Ilaria Giannoccaro</a><script data-card-contents-for-user="384148" type="text/json">{"id":384148,"first_name":"Ilaria","last_name":"Giannoccaro","domain_name":"poliba","page_name":"IlariaGiannoccaro","display_name":"Ilaria Giannoccaro","profile_url":"https://poliba.academia.edu/IlariaGiannoccaro?f_ri=1688","photo":"/images/s65_no_pic.png"}</script></span></span></li><li class="js-paper-rank-work_1364819 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="1364819"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 1364819, container: ".js-paper-rank-work_1364819", }); });</script></li><li class="js-percentile-work_1364819 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 1364819; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_1364819"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_1364819 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="1364819"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 1364819; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=1364819]").text(description); $(".js-view-count-work_1364819").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_1364819").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="1364819"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">10</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="3442" rel="nofollow" href="https://www.academia.edu/Documents/in/Production">Production</a>, <script data-card-contents-for-ri="3442" type="text/json">{"id":3442,"name":"Production","url":"https://www.academia.edu/Documents/in/Production?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="10919" rel="nofollow" href="https://www.academia.edu/Documents/in/Markov_Decision_Process">Markov Decision Process</a>, <script data-card-contents-for-ri="10919" type="text/json">{"id":10919,"name":"Markov Decision Process","url":"https://www.academia.edu/Documents/in/Markov_Decision_Process?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="24699" rel="nofollow" href="https://www.academia.edu/Documents/in/Supply_Chain">Supply Chain</a><script data-card-contents-for-ri="24699" type="text/json">{"id":24699,"name":"Supply Chain","url":"https://www.academia.edu/Documents/in/Supply_Chain?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=1364819]'), work: {"id":1364819,"title":"Inventory management in supply chains: a reinforcement learning approach","created_at":"2012-02-09T01:41:54.183-08:00","url":"https://www.academia.edu/1364819/Inventory_management_in_supply_chains_a_reinforcement_learning_approach?f_ri=1688","dom_id":"work_1364819","summary":"A major issue in supply chain inventory management is the coordination of inventory policies adopted by di!erent supply chain actors, such as suppliers, manufacturers, distributors, so as to smooth material #ow and minimize costs while responsively meeting customer demand. This paper presents an approach to manage inventory decisions at all stages of the supply chain in an integrated manner. It allows an inventory order policy to be determined, which is aimed at optimizing the performance of the whole supply chain. The approach consists of three techniques: (i) Markov decision processes (MDP) and (ii) an arti\"cial intelligent algorithm to solve MDPs, which is based on (iii) simulation modeling. In particular, the inventory problem is modeled as an MDP and a reinforcement learning (RL) algorithm is used to determine a near optimal inventory policy under an average reward criterion. RL is a simulation-based stochastic technique that proves very e$cient particularly when the MDP size is large.","downloadable_attachments":[{"id":51010293,"asset_id":1364819,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":384148,"first_name":"Ilaria","last_name":"Giannoccaro","domain_name":"poliba","page_name":"IlariaGiannoccaro","display_name":"Ilaria Giannoccaro","profile_url":"https://poliba.academia.edu/IlariaGiannoccaro?f_ri=1688","photo":"/images/s65_no_pic.png"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":3442,"name":"Production","url":"https://www.academia.edu/Documents/in/Production?f_ri=1688","nofollow":true},{"id":10919,"name":"Markov Decision Process","url":"https://www.academia.edu/Documents/in/Markov_Decision_Process?f_ri=1688","nofollow":true},{"id":24699,"name":"Supply Chain","url":"https://www.academia.edu/Documents/in/Supply_Chain?f_ri=1688","nofollow":true},{"id":28235,"name":"Multidisciplinary","url":"https://www.academia.edu/Documents/in/Multidisciplinary?f_ri=1688"},{"id":61714,"name":"Production economics","url":"https://www.academia.edu/Documents/in/Production_economics?f_ri=1688"},{"id":67961,"name":"Markov Decision Processes","url":"https://www.academia.edu/Documents/in/Markov_Decision_Processes?f_ri=1688"},{"id":101530,"name":"Artificial Intelligent","url":"https://www.academia.edu/Documents/in/Artificial_Intelligent?f_ri=1688"},{"id":117212,"name":"Inventory Management","url":"https://www.academia.edu/Documents/in/Inventory_Management?f_ri=1688"},{"id":1208732,"name":"Simulation Model","url":"https://www.academia.edu/Documents/in/Simulation_Model?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_39921156" data-work_id="39921156" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" rel="nofollow" href="https://www.academia.edu/39921156/The_Effects_of_the_LDA_Topic_Model_on_Sentiment_Classification">The Effects of the LDA Topic Model on Sentiment Classification</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Online reviews are a feedback to the product and play a key role in improving the product to cater to consumers. Online reviews that rely heavily on manual categorization are time consuming and labor intensive.The recurrent neural network... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_39921156" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Online reviews are a feedback to the product and play a key role in improving the product to cater to<br />consumers. Online reviews that rely heavily on manual categorization are time consuming and labor<br />intensive.The recurrent neural network in deep learning can process time series data, while the long and<br />short term memory network can process long time sequence data well. This has good experimental<br />verification support in natural language processing, machine translation, speech recognition and language<br />model.The merits of the extracted data features affect the classification results produced by the classification<br />model. The LDA topic model adds a priori a posteriori knowledge to classify the data so that the<br />characteristics of the data can be extracted efficiently.Applied to the classifier can improve accuracy and<br />efficiency. Two-way long-term and short-term memory networks are variants and extensions of cyclic neural<br />networks.The deep learning framework Keras uses Tensorflow as the backend to build a convenient two-way<br />long-term and short-term memory network model, which provides a strong technical support for the<br />experiment.Using the LDA topic model to extract the keywords needed to train the neural network and<br />increase the internal relationship between words can improve the learning efficiency of the model. The<br />experimental results in the same experimental environment are better than the traditional word frequency<br />features.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/39921156" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="dd6cc0aeaa65b3d9447b6b0e5be20b66" rel="nofollow" data-download="{"attachment_id":61946882,"asset_id":39921156,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/61946882/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="21109123" rel="nofollow" href="https://independent.academia.edu/ijscaijournal">International Journal on Soft Computing, Artificial Intelligence and Applications (IJSCAI)</a><script data-card-contents-for-user="21109123" type="text/json">{"id":21109123,"first_name":"International Journal on Soft Computing, Artificial Intelligence and Applications","last_name":"(IJSCAI)","domain_name":"independent","page_name":"ijscaijournal","display_name":"International Journal on Soft Computing, Artificial Intelligence and Applications (IJSCAI)","profile_url":"https://independent.academia.edu/ijscaijournal?f_ri=1688","photo":"https://0.academia-photos.com/21109123/9710763/110171411/s65_international_journal_on_soft_computing_artificial_intelligence_and_applications._ijscai_.jpg"}</script></span></span></li><li class="js-paper-rank-work_39921156 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="39921156"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 39921156, container: ".js-paper-rank-work_39921156", }); });</script></li><li class="js-percentile-work_39921156 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 39921156; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_39921156"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_39921156 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="39921156"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 39921156; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=39921156]").text(description); $(".js-view-count-work_39921156").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_39921156").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="39921156"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">16</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="422" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a>, <script data-card-contents-for-ri="422" type="text/json">{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1380" rel="nofollow" href="https://www.academia.edu/Documents/in/Computer_Engineering">Computer Engineering</a>, <script data-card-contents-for-ri="1380" type="text/json">{"id":1380,"name":"Computer Engineering","url":"https://www.academia.edu/Documents/in/Computer_Engineering?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="5486" rel="nofollow" href="https://www.academia.edu/Documents/in/Clustering_and_Classification_Methods">Clustering and Classification Methods</a><script data-card-contents-for-ri="5486" type="text/json">{"id":5486,"name":"Clustering and Classification Methods","url":"https://www.academia.edu/Documents/in/Clustering_and_Classification_Methods?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=39921156]'), work: {"id":39921156,"title":"The Effects of the LDA Topic Model on Sentiment Classification","created_at":"2019-07-25T00:12:48.249-07:00","url":"https://www.academia.edu/39921156/The_Effects_of_the_LDA_Topic_Model_on_Sentiment_Classification?f_ri=1688","dom_id":"work_39921156","summary":"Online reviews are a feedback to the product and play a key role in improving the product to cater to\nconsumers. Online reviews that rely heavily on manual categorization are time consuming and labor\nintensive.The recurrent neural network in deep learning can process time series data, while the long and\nshort term memory network can process long time sequence data well. This has good experimental\nverification support in natural language processing, machine translation, speech recognition and language\nmodel.The merits of the extracted data features affect the classification results produced by the classification\nmodel. The LDA topic model adds a priori a posteriori knowledge to classify the data so that the\ncharacteristics of the data can be extracted efficiently.Applied to the classifier can improve accuracy and\nefficiency. Two-way long-term and short-term memory networks are variants and extensions of cyclic neural\nnetworks.The deep learning framework Keras uses Tensorflow as the backend to build a convenient two-way\nlong-term and short-term memory network model, which provides a strong technical support for the\nexperiment.Using the LDA topic model to extract the keywords needed to train the neural network and\nincrease the internal relationship between words can improve the learning efficiency of the model. The\nexperimental results in the same experimental environment are better than the traditional word frequency\nfeatures.","downloadable_attachments":[{"id":61946882,"asset_id":39921156,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":21109123,"first_name":"International Journal on Soft Computing, Artificial Intelligence and Applications","last_name":"(IJSCAI)","domain_name":"independent","page_name":"ijscaijournal","display_name":"International Journal on Soft Computing, Artificial Intelligence and Applications (IJSCAI)","profile_url":"https://independent.academia.edu/ijscaijournal?f_ri=1688","photo":"https://0.academia-photos.com/21109123/9710763/110171411/s65_international_journal_on_soft_computing_artificial_intelligence_and_applications._ijscai_.jpg"}],"research_interests":[{"id":422,"name":"Computer Science","url":"https://www.academia.edu/Documents/in/Computer_Science?f_ri=1688","nofollow":true},{"id":1380,"name":"Computer Engineering","url":"https://www.academia.edu/Documents/in/Computer_Engineering?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":5486,"name":"Clustering and Classification Methods","url":"https://www.academia.edu/Documents/in/Clustering_and_Classification_Methods?f_ri=1688","nofollow":true},{"id":8910,"name":"Evaluation","url":"https://www.academia.edu/Documents/in/Evaluation?f_ri=1688"},{"id":10924,"name":"Optimization techniques","url":"https://www.academia.edu/Documents/in/Optimization_techniques?f_ri=1688"},{"id":13143,"name":"Clustering Algorithms","url":"https://www.academia.edu/Documents/in/Clustering_Algorithms?f_ri=1688"},{"id":15869,"name":"Online Learning","url":"https://www.academia.edu/Documents/in/Online_Learning?f_ri=1688"},{"id":55378,"name":"Bayesian Learning","url":"https://www.academia.edu/Documents/in/Bayesian_Learning?f_ri=1688"},{"id":62225,"name":"Fuzzy Logic Programming","url":"https://www.academia.edu/Documents/in/Fuzzy_Logic_Programming?f_ri=1688"},{"id":85262,"name":"Markov Chain Monte Carlo","url":"https://www.academia.edu/Documents/in/Markov_Chain_Monte_Carlo?f_ri=1688"},{"id":95068,"name":"Kernel Methods","url":"https://www.academia.edu/Documents/in/Kernel_Methods?f_ri=1688"},{"id":102173,"name":"Functional Logic Programming","url":"https://www.academia.edu/Documents/in/Functional_Logic_Programming?f_ri=1688"},{"id":613846,"name":"Optimization Technology","url":"https://www.academia.edu/Documents/in/Optimization_Technology?f_ri=1688"},{"id":639817,"name":"Soft Computing (Reinforcement Learning)","url":"https://www.academia.edu/Documents/in/Soft_Computing_Reinforcement_Learning_?f_ri=1688"},{"id":1638973,"name":"Logical Programming","url":"https://www.academia.edu/Documents/in/Logical_Programming?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_40931596" data-work_id="40931596" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/40931596/_1007_at_">.1007@</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">Internet-of-Things (IoT) generate large data that is processed, analysed and filtered by cloud data centres. IoT is getting tremendously popular: the number of IoT devices worldwide is expected to reach 50.1 billion by 2020 and from this,... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_40931596" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">Internet-of-Things (IoT) generate large data that is processed, analysed<br />and filtered by cloud data centres. IoT is getting tremendously popular: the<br />number of IoT devices worldwide is expected to reach 50.1 billion by 2020 and<br />from this, 30.7% of IoT devices will be made available in Healthcare. Transmission<br />and analysis of this much amount of data will increase the response time<br />of cloud computing. The increase in response time will lead to high service<br />latency to the end-users. The main requirement of IoT is to have low latency to<br />transfer the data in real-time. Cloud cannot fulfill the QoS requirement in a<br />satisfactory manner. Both the volume of data as well as factors related to internet<br />connectivity may lead to high network latency in analyzing and acting upon the<br />data. The propose research work introduces a hybrid approach that combines<br />fuzzy and reinforcement learning to improve service and network latency in<br />healthcare IoT and cloud. This hybrid approach integrates healthcare IoT<br />devices with the cloud and uses fog services with Fuzzy Reinforcement<br />Learning Data Packet Allocation (FRLDPA) algorithm. The propose algorithm<br />performs batch workloads on IoT data to minimize latency and manages the<br />QoS of the latency-critical workloads. It has the potential to automate the reasoning<br />and decision making capability in fog computing nodes.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/40931596" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="ff8aeefdc3cd505b0714b85268d9333b" rel="nofollow" data-download="{"attachment_id":61220329,"asset_id":40931596,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/61220329/download_file?st=MTc0MDYwMDQyMyw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="57378884" href="https://insight-centre.academia.edu/DrSaurabhShukla">Dr. Saurabh Shukla</a><script data-card-contents-for-user="57378884" type="text/json">{"id":57378884,"first_name":"Dr. Saurabh","last_name":"Shukla","domain_name":"insight-centre","page_name":"DrSaurabhShukla","display_name":"Dr. Saurabh Shukla","profile_url":"https://insight-centre.academia.edu/DrSaurabhShukla?f_ri=1688","photo":"https://0.academia-photos.com/57378884/15058482/30895289/s65_saurabh.shukla.jpg"}</script></span></span></li><li class="js-paper-rank-work_40931596 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="40931596"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 40931596, container: ".js-paper-rank-work_40931596", }); });</script></li><li class="js-percentile-work_40931596 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 40931596; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_40931596"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_40931596 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="40931596"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 40931596; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=40931596]").text(description); $(".js-view-count-work_40931596").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_40931596").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="40931596"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">8</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a>, <script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="4165" rel="nofollow" href="https://www.academia.edu/Documents/in/Fuzzy_Logic">Fuzzy Logic</a>, <script data-card-contents-for-ri="4165" type="text/json">{"id":4165,"name":"Fuzzy Logic","url":"https://www.academia.edu/Documents/in/Fuzzy_Logic?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="9135" rel="nofollow" href="https://www.academia.edu/Documents/in/The_Internet_of_Things">The Internet of Things</a><script data-card-contents-for-ri="9135" type="text/json">{"id":9135,"name":"The Internet of Things","url":"https://www.academia.edu/Documents/in/The_Internet_of_Things?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=40931596]'), work: {"id":40931596,"title":".1007@","created_at":"2019-11-14T18:17:48.644-08:00","url":"https://www.academia.edu/40931596/_1007_at_?f_ri=1688","dom_id":"work_40931596","summary":"Internet-of-Things (IoT) generate large data that is processed, analysed\nand filtered by cloud data centres. IoT is getting tremendously popular: the\nnumber of IoT devices worldwide is expected to reach 50.1 billion by 2020 and\nfrom this, 30.7% of IoT devices will be made available in Healthcare. Transmission\nand analysis of this much amount of data will increase the response time\nof cloud computing. The increase in response time will lead to high service\nlatency to the end-users. The main requirement of IoT is to have low latency to\ntransfer the data in real-time. Cloud cannot fulfill the QoS requirement in a\nsatisfactory manner. Both the volume of data as well as factors related to internet\nconnectivity may lead to high network latency in analyzing and acting upon the\ndata. The propose research work introduces a hybrid approach that combines\nfuzzy and reinforcement learning to improve service and network latency in\nhealthcare IoT and cloud. This hybrid approach integrates healthcare IoT\ndevices with the cloud and uses fog services with Fuzzy Reinforcement\nLearning Data Packet Allocation (FRLDPA) algorithm. The propose algorithm\nperforms batch workloads on IoT data to minimize latency and manages the\nQoS of the latency-critical workloads. It has the potential to automate the reasoning\nand decision making capability in fog computing nodes.","downloadable_attachments":[{"id":61220329,"asset_id":40931596,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":57378884,"first_name":"Dr. Saurabh","last_name":"Shukla","domain_name":"insight-centre","page_name":"DrSaurabhShukla","display_name":"Dr. Saurabh Shukla","profile_url":"https://insight-centre.academia.edu/DrSaurabhShukla?f_ri=1688","photo":"https://0.academia-photos.com/57378884/15058482/30895289/s65_saurabh.shukla.jpg"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":4165,"name":"Fuzzy Logic","url":"https://www.academia.edu/Documents/in/Fuzzy_Logic?f_ri=1688","nofollow":true},{"id":9135,"name":"The Internet of Things","url":"https://www.academia.edu/Documents/in/The_Internet_of_Things?f_ri=1688","nofollow":true},{"id":11598,"name":"Neural Networks","url":"https://www.academia.edu/Documents/in/Neural_Networks?f_ri=1688"},{"id":26860,"name":"Cloud Computing","url":"https://www.academia.edu/Documents/in/Cloud_Computing?f_ri=1688"},{"id":28871,"name":"Java Programming","url":"https://www.academia.edu/Documents/in/Java_Programming?f_ri=1688"},{"id":1591774,"name":"Fog Computing","url":"https://www.academia.edu/Documents/in/Fog_Computing?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_39625661" data-work_id="39625661" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/39625661/Decision_Support_for_an_Adversarial_Game_Environment_Using_Automatic_Hint_Generation">Decision Support for an Adversarial Game Environment Using Automatic Hint Generation</a></div></div><div class="u-pb4x u-mt3x"><div class="summary u-fs14 u-fw300 u-lineHeight1_5 u-tcGrayDarkest"><div class="summarized">The Hint Factory is a method of automatic hint generation that has been used to augment hints in a number of educational systems. Although the previous implementations were done in domains with largely deterministic environments, the... <a class="more_link u-tcGrayDark u-linkUnstyled" data-container=".work_39625661" data-show=".complete" data-hide=".summarized" data-more-link-behavior="true" href="#">more</a></div><div class="complete hidden">The Hint Factory is a method of automatic hint generation that has been used to augment hints in a number of educational systems. Although the previous implementations were done in domains with largely deterministic environments, the methods are inherently useful in stochastic environments with uncertainty. In this work, we explore the game Connect Four as a simple domain to give decision support under uncertainty. We speculate how the implementation created could be extended to other domains including simulated learning environments and advanced navigational tasks.</div></div></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/39625661" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="eefa49dc1f0871f851f83998d429a676" rel="nofollow" data-download="{"attachment_id":59783236,"asset_id":39625661,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/59783236/download_file?st=MTc0MDYwMDQyNCw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="331840" href="https://cmu.academia.edu/JohnStamper">John Stamper</a><script data-card-contents-for-user="331840" type="text/json">{"id":331840,"first_name":"John","last_name":"Stamper","domain_name":"cmu","page_name":"JohnStamper","display_name":"John Stamper","profile_url":"https://cmu.academia.edu/JohnStamper?f_ri=1688","photo":"https://0.academia-photos.com/331840/138203/167166/s65_john.stamper.jpg"}</script></span></span></li><li class="js-paper-rank-work_39625661 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="39625661"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 39625661, container: ".js-paper-rank-work_39625661", }); });</script></li><li class="js-percentile-work_39625661 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 39625661; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_39625661"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_39625661 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="39625661"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 39625661; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=39625661]").text(description); $(".js-view-count-work_39625661").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_39625661").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="39625661"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">3</a> </div><span class="InlineList-item-text u-textTruncate u-pl9x"><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="23232" rel="nofollow" href="https://www.academia.edu/Documents/in/Intelligent_Tutoring_Systems">Intelligent Tutoring Systems</a>, <script data-card-contents-for-ri="23232" type="text/json">{"id":23232,"name":"Intelligent Tutoring Systems","url":"https://www.academia.edu/Documents/in/Intelligent_Tutoring_Systems?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="23995" rel="nofollow" href="https://www.academia.edu/Documents/in/Educational_Data_Mining">Educational Data Mining</a><script data-card-contents-for-ri="23995" type="text/json">{"id":23995,"name":"Educational Data Mining","url":"https://www.academia.edu/Documents/in/Educational_Data_Mining?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=39625661]'), work: {"id":39625661,"title":"Decision Support for an Adversarial Game Environment Using Automatic Hint Generation","created_at":"2019-06-18T07:30:04.378-07:00","url":"https://www.academia.edu/39625661/Decision_Support_for_an_Adversarial_Game_Environment_Using_Automatic_Hint_Generation?f_ri=1688","dom_id":"work_39625661","summary":"The Hint Factory is a method of automatic hint generation that has been used to augment hints in a number of educational systems. Although the previous implementations were done in domains with largely deterministic environments, the methods are inherently useful in stochastic environments with uncertainty. In this work, we explore the game Connect Four as a simple domain to give decision support under uncertainty. We speculate how the implementation created could be extended to other domains including simulated learning environments and advanced navigational tasks.","downloadable_attachments":[{"id":59783236,"asset_id":39625661,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":331840,"first_name":"John","last_name":"Stamper","domain_name":"cmu","page_name":"JohnStamper","display_name":"John Stamper","profile_url":"https://cmu.academia.edu/JohnStamper?f_ri=1688","photo":"https://0.academia-photos.com/331840/138203/167166/s65_john.stamper.jpg"}],"research_interests":[{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":23232,"name":"Intelligent Tutoring Systems","url":"https://www.academia.edu/Documents/in/Intelligent_Tutoring_Systems?f_ri=1688","nofollow":true},{"id":23995,"name":"Educational Data Mining","url":"https://www.academia.edu/Documents/in/Educational_Data_Mining?f_ri=1688","nofollow":true}]}, }) } })();</script></ul></li></ul></div></div><div class="u-borderBottom1 u-borderColorGrayLighter"><div class="clearfix u-pv7x u-mb0x js-work-card work_630060" data-work_id="630060" itemscope="itemscope" itemtype="https://schema.org/ScholarlyArticle"><div class="header"><div class="title u-fontSerif u-fs22 u-lineHeight1_3"><a class="u-tcGrayDarkest js-work-link" href="https://www.academia.edu/630060/Learning_near_optimal_policies_with_Bellman_residual_minimization_based_fitted_policy_iteration_and_a_single_sample_path">Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path</a></div></div><div class="u-pb4x u-mt3x"></div><ul class="InlineList u-ph0x u-fs13"><li class="InlineList-item logged_in_only"><div class="share_on_academia_work_button"><a class="academia_share Button Button--inverseBlue Button--sm js-bookmark-button" data-academia-share="Work/630060" data-share-source="work_strip" data-spinner="small_white_hide_contents"><i class="fa fa-plus"></i><span class="work-strip-link-text u-ml1x" data-content="button_text">Bookmark</span></a></div></li><li class="InlineList-item"><div class="download"><a id="3e8d40424ce9aed1dcdd8635bda94ec4" rel="nofollow" data-download="{"attachment_id":3406054,"asset_id":630060,"asset_type":"Work","always_allow_download":false,"track":null,"button_location":"work_strip","source":null,"hide_modal":null}" class="Button Button--sm Button--inverseGreen js-download-button prompt_button doc_download" href="https://www.academia.edu/attachments/3406054/download_file?st=MTc0MDYwMDQyNCw4LjIyMi4yMDguMTQ2&s=work_strip"><i class="fa fa-arrow-circle-o-down fa-lg"></i><span class="u-textUppercase u-ml1x" data-content="button_text">Download</span></a></div></li><li class="InlineList-item"><ul class="InlineList InlineList--bordered u-ph0x"><li class="InlineList-item InlineList-item--bordered"><span class="InlineList-item-text">by <span itemscope="itemscope" itemprop="author" itemtype="https://schema.org/Person"><a class="u-tcGrayDark u-fw700" data-has-card-for-user="462434" href="https://ualberta.academia.edu/CsabaSzepesvari">Csaba Szepesvari</a><script data-card-contents-for-user="462434" type="text/json">{"id":462434,"first_name":"Csaba","last_name":"Szepesvari","domain_name":"ualberta","page_name":"CsabaSzepesvari","display_name":"Csaba Szepesvari","profile_url":"https://ualberta.academia.edu/CsabaSzepesvari?f_ri=1688","photo":"https://0.academia-photos.com/462434/152664/177623/s65_csaba.szepesvari.jpg"}</script></span></span></li><li class="js-paper-rank-work_630060 InlineList-item InlineList-item--bordered hidden"><span class="js-paper-rank-view hidden u-tcGrayDark" data-paper-rank-work-id="630060"><i class="u-m1x fa fa-bar-chart"></i><strong class="js-paper-rank"></strong></span><script>$(function() { new Works.PaperRankView({ workId: 630060, container: ".js-paper-rank-work_630060", }); });</script></li><li class="js-percentile-work_630060 InlineList-item InlineList-item--bordered hidden u-tcGrayDark"><span class="percentile-widget hidden"><span class="u-mr2x percentile-widget" style="display: none">•</span><span class="u-mr2x work-percentile"></span></span><script>$(function () { var workId = 630060; window.Academia.workPercentilesFetcher.queue(workId, function (percentileText) { var container = $(".js-percentile-work_630060"); container.find('.work-percentile').text(percentileText.charAt(0).toUpperCase() + percentileText.slice(1)); container.find('.percentile-widget').show(); container.find('.percentile-widget').removeClass('hidden'); }); });</script></li><li class="js-view-count-work_630060 InlineList-item InlineList-item--bordered hidden"><div><span><span class="js-view-count view-count u-mr2x" data-work-id="630060"><i class="fa fa-spinner fa-spin"></i></span><script>$(function () { var workId = 630060; window.Academia.workViewCountsFetcher.queue(workId, function (count) { var description = window.$h.commaizeInt(count) + " " + window.$h.pluralize(count, 'View'); $(".js-view-count[data-work-id=630060]").text(description); $(".js-view-count-work_630060").attr('title', description).tooltip(); }); });</script></span><script>$(function() { $(".js-view-count-work_630060").removeClass('hidden') })</script></div></li><li class="InlineList-item u-positionRelative" style="max-width: 250px"><div class="u-positionAbsolute" data-has-card-for-ri-list="630060"><i class="fa fa-tag InlineList-item-icon u-positionRelative"></i> <a class="InlineList-item-text u-positionRelative">43</a> </div><span class="InlineList-item-text u-textTruncate u-pl10x"><a class="InlineList-item-text" data-has-card-for-ri="237" rel="nofollow" href="https://www.academia.edu/Documents/in/Cognitive_Science">Cognitive Science</a>, <script data-card-contents-for-ri="237" type="text/json">{"id":237,"name":"Cognitive Science","url":"https://www.academia.edu/Documents/in/Cognitive_Science?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="465" rel="nofollow" href="https://www.academia.edu/Documents/in/Artificial_Intelligence">Artificial Intelligence</a>, <script data-card-contents-for-ri="465" type="text/json">{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="1688" rel="nofollow" href="https://www.academia.edu/Documents/in/Reinforcement_Learning">Reinforcement Learning</a>, <script data-card-contents-for-ri="1688" type="text/json">{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true}</script><a class="InlineList-item-text" data-has-card-for-ri="2008" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a><script data-card-contents-for-ri="2008" type="text/json">{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true}</script></span></li><script>(function(){ if (true) { new Aedu.ResearchInterestListCard({ el: $('*[data-has-card-for-ri-list=630060]'), work: {"id":630060,"title":"Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path","created_at":"2011-05-30T06:19:37.167-07:00","url":"https://www.academia.edu/630060/Learning_near_optimal_policies_with_Bellman_residual_minimization_based_fitted_policy_iteration_and_a_single_sample_path?f_ri=1688","dom_id":"work_630060","summary":null,"downloadable_attachments":[{"id":3406054,"asset_id":630060,"asset_type":"Work","always_allow_download":false}],"ordered_authors":[{"id":462434,"first_name":"Csaba","last_name":"Szepesvari","domain_name":"ualberta","page_name":"CsabaSzepesvari","display_name":"Csaba Szepesvari","profile_url":"https://ualberta.academia.edu/CsabaSzepesvari?f_ri=1688","photo":"https://0.academia-photos.com/462434/152664/177623/s65_csaba.szepesvari.jpg"}],"research_interests":[{"id":237,"name":"Cognitive Science","url":"https://www.academia.edu/Documents/in/Cognitive_Science?f_ri=1688","nofollow":true},{"id":465,"name":"Artificial Intelligence","url":"https://www.academia.edu/Documents/in/Artificial_Intelligence?f_ri=1688","nofollow":true},{"id":1688,"name":"Reinforcement Learning","url":"https://www.academia.edu/Documents/in/Reinforcement_Learning?f_ri=1688","nofollow":true},{"id":2008,"name":"Machine Learning","url":"https://www.academia.edu/Documents/in/Machine_Learning?f_ri=1688","nofollow":true},{"id":2950,"name":"Computational Modeling","url":"https://www.academia.edu/Documents/in/Computational_Modeling?f_ri=1688"},{"id":3243,"name":"Nonparametric Statistics","url":"https://www.academia.edu/Documents/in/Nonparametric_Statistics?f_ri=1688"},{"id":4456,"name":"Time Series","url":"https://www.academia.edu/Documents/in/Time_Series?f_ri=1688"},{"id":6177,"name":"Modeling","url":"https://www.academia.edu/Documents/in/Modeling?f_ri=1688"},{"id":42811,"name":"Mixing","url":"https://www.academia.edu/Documents/in/Mixing?f_ri=1688"},{"id":43131,"name":"Stochastic processes","url":"https://www.academia.edu/Documents/in/Stochastic_processes?f_ri=1688"},{"id":51181,"name":"Controllability","url":"https://www.academia.edu/Documents/in/Controllability?f_ri=1688"},{"id":51529,"name":"Bayesian Inference","url":"https://www.academia.edu/Documents/in/Bayesian_Inference?f_ri=1688"},{"id":59770,"name":"Trajectory","url":"https://www.academia.edu/Documents/in/Trajectory?f_ri=1688"},{"id":85262,"name":"Markov Chain Monte Carlo","url":"https://www.academia.edu/Documents/in/Markov_Chain_Monte_Carlo?f_ri=1688"},{"id":90194,"name":"Dynamic Bayesian Networks","url":"https://www.academia.edu/Documents/in/Dynamic_Bayesian_Networks?f_ri=1688"},{"id":102502,"name":"Computational Learning Theory","url":"https://www.academia.edu/Documents/in/Computational_Learning_Theory?f_ri=1688"},{"id":135913,"name":"State Space","url":"https://www.academia.edu/Documents/in/State_Space?f_ri=1688"},{"id":166781,"name":"Learning Theory","url":"https://www.academia.edu/Documents/in/Learning_Theory?f_ri=1688"},{"id":180204,"name":"Nonparametric Regression","url":"https://www.academia.edu/Documents/in/Nonparametric_Regression?f_ri=1688"},{"id":181785,"name":"Time Series Data","url":"https://www.academia.edu/Documents/in/Time_Series_Data?f_ri=1688"},{"id":181865,"name":"Policy Iteration","url":"https://www.academia.edu/Documents/in/Policy_Iteration?f_ri=1688"},{"id":197861,"name":"Domain Knowledge","url":"https://www.academia.edu/Documents/in/Domain_Knowledge?f_ri=1688"},{"id":234860,"name":"Steady state","url":"https://www.academia.edu/Documents/in/Steady_state?f_ri=1688"},{"id":274599,"name":"Bayesian Network","url":"https://www.academia.edu/Documents/in/Bayesian_Network?f_ri=1688"},{"id":349059,"name":"Decision Problem","url":"https://www.academia.edu/Documents/in/Decision_Problem?f_ri=1688"},{"id":389523,"name":"Exact Computation","url":"https://www.academia.edu/Documents/in/Exact_Computation?f_ri=1688"},{"id":392790,"name":"dynamic Bayesian network","url":"https://www.academia.edu/Documents/in/dynamic_Bayesian_network?f_ri=1688"},{"id":584683,"name":"Waste minimisation","url":"https://www.academia.edu/Documents/in/Waste_minimisation?f_ri=1688"},{"id":607461,"name":"Network structure","url":"https://www.academia.edu/Documents/in/Network_structure?f_ri=1688"},{"id":611183,"name":"Parameterization","url":"https://www.academia.edu/Documents/in/Parameterization?f_ri=1688"},{"id":694897,"name":"Dynamic Model of WSN","url":"https://www.academia.edu/Documents/in/Dynamic_Model_of_WSN?f_ri=1688"},{"id":741671,"name":"Markov Process","url":"https://www.academia.edu/Documents/in/Markov_Process?f_ri=1688"},{"id":820715,"name":"Data Consistency","url":"https://www.academia.edu/Documents/in/Data_Consistency?f_ri=1688"},{"id":868912,"name":"Dynamic System","url":"https://www.academia.edu/Documents/in/Dynamic_System?f_ri=1688"},{"id":886652,"name":"Graphical Model","url":"https://www.academia.edu/Documents/in/Graphical_Model?f_ri=1688"},{"id":903929,"name":"Batch Process","url":"https://www.academia.edu/Documents/in/Batch_Process?f_ri=1688"},{"id":1138319,"name":"Learning Methods","url":"https://www.academia.edu/Documents/in/Learning_Methods?f_ri=1688"},{"id":1340139,"name":"Dynamic Networks","url":"https://www.academia.edu/Documents/in/Dynamic_Networks?f_ri=1688"},{"id":1480215,"name":"Time varying","url":"https://www.academia.edu/Documents/in/Time_varying?f_ri=1688"},{"id":1561665,"name":"Minimization","url":"https://www.academia.edu/Documents/in/Minimization?f_ri=1688"},{"id":2413807,"name":"Generic model","url":"https://www.academia.edu/Documents/in/Generic_model?f_ri=1688"},{"id":2509332,"name":"steady state analysis","url":"https://www.academia.edu/Documents/in/steady_state_analysis?f_ri=1688"},{"id":2595821,"name":"Least squares method","url":"https://www.academia.edu/Documents/in/Least_squares_method?f_ri=1688"}]}, }) } })();</script></ul></li></ul></div></div></div><div class="u-taCenter Pagination"><ul class="pagination"><li class="next_page"><a href="/Documents/in/Reinforcement_Learning?after=50%2C630060" rel="next">Next</a></li><li class="last next"><a href="/Documents/in/Reinforcement_Learning?page=last">Last »</a></li></ul></div></div><div class="hidden-xs hidden-sm"><div class="u-pl6x"><div style="width: 300px;"><div class="panel panel-flat u-mt7x"><div class="panel-heading u-p5x"><div class="u-tcGrayDark u-taCenter u-fw700 u-textUppercase">Related Topics</div></div><ul class="list-group"><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Sanctuaries_in_Ancient_Rome_and_Italy">Sanctuaries in Ancient Rome and Italy</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="439211">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="439211">Following</a></div></li><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Colon_cancer">Colon cancer</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="420802">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="420802">Following</a></div></li><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Machine_Learning">Machine Learning</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="2008">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="2008">Following</a></div></li><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Malacology_Biology_">Malacology (Biology)</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="17605">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="17605">Following</a></div></li><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Affective_Neuroscience">Affective Neuroscience</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="9088">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="9088">Following</a></div></li><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Computational_Neuroscience">Computational Neuroscience</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="5451">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="5451">Following</a></div></li><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Artificial_Intelligence">Artificial Intelligence</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="465">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="465">Following</a></div></li><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Decision_Making">Decision Making</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="1681">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="1681">Following</a></div></li><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Learning_and_Memory">Learning & Memory</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="1677">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="1677">Following</a></div></li><li class="list-group-item media_v2 u-mt0x u-p3x"><div class="media-body"><div class="u-tcGrayDarker u-fw700"><a class="u-tcGrayDarker" rel="nofollow" href="https://www.academia.edu/Documents/in/Cognitive_Science">Cognitive Science</a></div></div><div class="media-right media-middle"><a class="u-tcGreen u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-follow-ri-id="237">Follow</a><a class="u-tcGray u-textDecorationNone u-linkUnstyled u-fw500 hidden" data-unfollow-ri-id="237">Following</a></div></li></ul></div></div></div></div></div></div><script>// MIT License // Copyright © 2011 Sebastian Tschan, https://blueimp.net // Permission is hereby granted, free of charge, to any person obtaining a copy of // this software and associated documentation files (the "Software"), to deal in // the Software without restriction, including without limitation the rights to // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. !function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16|65535&r}function f(n,t,r,e,o,u){return d((c=d(d(t,n),d(e,u)))<<(f=o)|c>>>32-f,r);var c,f}function l(n,t,r,e,o,u,c){return f(t&r|~t&e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t&e|r&~e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t|~e),n,t,o,u,c)}function i(n,t){var r,e,o,u;n[t>>5]|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=v(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=v(a,c,f,i,n[h+6],9,-1069501632),i=v(i,a,c,f,n[h+11],14,643717713),f=v(f,i,a,c,n[h],20,-373897302),c=v(c,f,i,a,n[h+5],5,-701558691),a=v(a,c,f,i,n[h+10],9,38016083),i=v(i,a,c,f,n[h+15],14,-660478335),f=v(f,i,a,c,n[h+4],20,-405537848),c=v(c,f,i,a,n[h+9],5,568446438),a=v(a,c,f,i,n[h+14],9,-1019803690),i=v(i,a,c,f,n[h+3],14,-187363961),f=v(f,i,a,c,n[h+8],20,1163531501),c=v(c,f,i,a,n[h+13],5,-1444681467),a=v(a,c,f,i,n[h+2],9,-51403784),i=v(i,a,c,f,n[h+7],14,1735328473),c=g(c,f=v(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=g(a,c,f,i,n[h+8],11,-2022574463),i=g(i,a,c,f,n[h+11],16,1839030562),f=g(f,i,a,c,n[h+14],23,-35309556),c=g(c,f,i,a,n[h+1],4,-1530992060),a=g(a,c,f,i,n[h+4],11,1272893353),i=g(i,a,c,f,n[h+7],16,-155497632),f=g(f,i,a,c,n[h+10],23,-1094730640),c=g(c,f,i,a,n[h+13],4,681279174),a=g(a,c,f,i,n[h],11,-358537222),i=g(i,a,c,f,n[h+3],16,-722521979),f=g(f,i,a,c,n[h+6],23,76029189),c=g(c,f,i,a,n[h+9],4,-640364487),a=g(a,c,f,i,n[h+12],11,-421815835),i=g(i,a,c,f,n[h+15],16,530742520),c=m(c,f=g(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function a(n){for(var t="",r=32*n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function h(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8*n.length,e=0;e<r;e+=8)t[e>>5]|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return a(i(h(t=r(n)),8*t.length));var t}function u(n,t){return function(n,t){var r,e,o=h(n),u=[],c=[];for(u[15]=c[15]=void 0,16<o.length&&(o=i(o,8*n.length)),r=0;r<16;r+=1)u[r]=909522486^o[r],c[r]=1549556828^o[r];return e=i(u.concat(h(t)),512+8*t.length),a(i(c.concat(e),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);</script><script>window.AbTest = (function() { return { 'ab_test': (uniqueId, test_name, buckets) => { let override = new URLSearchParams(window.location.search).get(`ab_test[${test_name}]`); if ( override ) { return override; } const bucketNames = buckets.map((bucket) => { return typeof bucket === 'string' ? bucket : Object.keys(bucket)[0]; }); const weights = buckets.map((bucket) => { return typeof bucket === 'string' ? 1 : Object.values(bucket)[0]; }); const total = weights.reduce((sum, weight) => sum + weight); const hash = md5(`${uniqueId}${test_name}`); const hashNum = parseInt(hash.slice(-12), 16); let bucketPoint = total * (hashNum % 100000) / 100000; const bucket = bucketNames.find((_, i) => { if (weights[i] > bucketPoint) { return true; } bucketPoint -= weights[i]; return false; }); return bucket; } }; })();</script><div data-auto_select="false" data-client_id="331998490334-rsn3chp12mbkiqhl6e7lu2q0mlbu0f1b" data-landing_url="https://www.academia.edu/Documents/in/Reinforcement_Learning" data-login_uri="https://www.academia.edu/registrations/google_one_tap" data-moment_callback="onGoogleOneTapEvent" id="g_id_onload"></div><script>function onGoogleOneTapEvent(event) { var momentType = event.getMomentType(); var momentReason = null; if (event.isNotDisplayed()) { momentReason = event.getNotDisplayedReason(); } else if (event.isSkippedMoment()) { momentReason = event.getSkippedReason(); } else if (event.isDismissedMoment()) { momentReason = event.getDismissedReason(); } Aedu.arbitraryEvents.write('GoogleOneTapEvent', { moment_type: momentType, moment_reason: momentReason, }); }</script><script>(function() { var auvid = unescape( document.cookie .split(/; ?/) .find((s) => s.startsWith('auvid')) .substring(6)); var bucket = AbTest.ab_test(auvid, 'lo_ri_one_tap_google_sign_on', ['control', 'one_tap_google_sign_on']); if (bucket === 'control') return; var oneTapTag = document.createElement('script') oneTapTag.async = true oneTapTag.defer = true oneTapTag.src = 'https://accounts.google.com/gsi/client' document.body.appendChild(oneTapTag) })();</script></div></div></div> </div> <div class="bootstrap login"><div class="modal fade login-modal" id="login-modal"><div class="login-modal-dialog modal-dialog"><div class="modal-content"><div class="modal-header"><button class="close close" data-dismiss="modal" type="button"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title text-center"><strong>Log In</strong></h4></div><div class="modal-body"><div class="row"><div class="col-xs-10 col-xs-offset-1"><button class="btn btn-fb btn-lg btn-block btn-v-center-content" id="login-facebook-oauth-button"><svg style="float: left; width: 19px; line-height: 1em; margin-right: .3em;" aria-hidden="true" focusable="false" data-prefix="fab" data-icon="facebook-square" class="svg-inline--fa fa-facebook-square fa-w-14" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M400 32H48A48 48 0 0 0 0 80v352a48 48 0 0 0 48 48h137.25V327.69h-63V256h63v-54.64c0-62.15 37-96.48 93.67-96.48 27.14 0 55.52 4.84 55.52 4.84v61h-31.27c-30.81 0-40.42 19.12-40.42 38.73V256h68.78l-11 71.69h-57.78V480H400a48 48 0 0 0 48-48V80a48 48 0 0 0-48-48z"></path></svg><small><strong>Log in</strong> with <strong>Facebook</strong></small></button><br /><button class="btn btn-google btn-lg btn-block btn-v-center-content" id="login-google-oauth-button"><svg style="float: left; width: 22px; line-height: 1em; margin-right: .3em;" aria-hidden="true" focusable="false" data-prefix="fab" data-icon="google-plus" class="svg-inline--fa fa-google-plus fa-w-16" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M256,8C119.1,8,8,119.1,8,256S119.1,504,256,504,504,392.9,504,256,392.9,8,256,8ZM185.3,380a124,124,0,0,1,0-248c31.3,0,60.1,11,83,32.3l-33.6,32.6c-13.2-12.9-31.3-19.1-49.4-19.1-42.9,0-77.2,35.5-77.2,78.1S142.3,334,185.3,334c32.6,0,64.9-19.1,70.1-53.3H185.3V238.1H302.2a109.2,109.2,0,0,1,1.9,20.7c0,70.8-47.5,121.2-118.8,121.2ZM415.5,273.8v35.5H380V273.8H344.5V238.3H380V202.8h35.5v35.5h35.2v35.5Z"></path></svg><small><strong>Log in</strong> with <strong>Google</strong></small></button><br /><style type="text/css">.sign-in-with-apple-button { width: 100%; height: 52px; border-radius: 3px; border: 1px solid black; cursor: pointer; } .sign-in-with-apple-button > div { margin: 0 auto; / This centers the Apple-rendered button horizontally }</style><script src="https://appleid.cdn-apple.com/appleauth/static/jsapi/appleid/1/en_US/appleid.auth.js" type="text/javascript"></script><div class="sign-in-with-apple-button" data-border="false" data-color="white" id="appleid-signin"><span ="Sign Up with Apple" class="u-fs11"></span></div><script>AppleID.auth.init({ clientId: 'edu.academia.applesignon', scope: 'name email', redirectURI: 'https://www.academia.edu/sessions', state: "e49d56988024f8b89d28c511220d5235a593c5b6cbc11236344e6816bf96768b", });</script><script>// Hacky way of checking if on fast loswp if (window.loswp == null) { (function() { const Google = window?.Aedu?.Auth?.OauthButton?.Login?.Google; const Facebook = window?.Aedu?.Auth?.OauthButton?.Login?.Facebook; if (Google) { new Google({ el: '#login-google-oauth-button', rememberMeCheckboxId: 'remember_me', track: null }); } if (Facebook) { new Facebook({ el: '#login-facebook-oauth-button', rememberMeCheckboxId: 'remember_me', track: null }); } })(); }</script></div></div></div><div class="modal-body"><div class="row"><div class="col-xs-10 col-xs-offset-1"><div class="hr-heading login-hr-heading"><span class="hr-heading-text">or</span></div></div></div></div><div class="modal-body"><div class="row"><div class="col-xs-10 col-xs-offset-1"><form class="js-login-form" action="https://www.academia.edu/sessions" accept-charset="UTF-8" method="post"><input type="hidden" name="authenticity_token" value="IjGAKf-VCCv-RzlrCAOyVF-x9d41F0PB8vEW1UdruhjgURANHozGhASCicjM69NYI-sE1fckxyIi4ZNMgdYnqQ" autocomplete="off" /><div class="form-group"><label class="control-label" for="login-modal-email-input" style="font-size: 14px;">Email</label><input class="form-control" id="login-modal-email-input" name="login" type="email" /></div><div class="form-group"><label class="control-label" for="login-modal-password-input" style="font-size: 14px;">Password</label><input class="form-control" id="login-modal-password-input" name="password" type="password" /></div><input type="hidden" name="post_login_redirect_url" id="post_login_redirect_url" value="https://www.academia.edu/Documents/in/Reinforcement_Learning" autocomplete="off" /><div class="checkbox"><label><input type="checkbox" name="remember_me" id="remember_me" value="1" checked="checked" /><small style="font-size: 12px; margin-top: 2px; display: inline-block;">Remember me on this computer</small></label></div><br><input type="submit" name="commit" value="Log In" class="btn btn-primary btn-block btn-lg js-login-submit" data-disable-with="Log In" /></br></form><script>typeof window?.Aedu?.recaptchaManagedForm === 'function' && window.Aedu.recaptchaManagedForm( document.querySelector('.js-login-form'), document.querySelector('.js-login-submit') );</script><small style="font-size: 12px;"><br />or <a data-target="#login-modal-reset-password-container" data-toggle="collapse" href="javascript:void(0)">reset password</a></small><div class="collapse" id="login-modal-reset-password-container"><br /><div class="well margin-0x"><form class="js-password-reset-form" action="https://www.academia.edu/reset_password" accept-charset="UTF-8" method="post"><input type="hidden" name="authenticity_token" value="gqcNkz2Cs52UgSJ_HHaeRT9UPdoPFjuiXcb0YBzlkHtAx5233Jt9Mm5EktzYnv9JQw7M0c0lv0GN1nH52lgNyg" autocomplete="off" /><p>Enter the email address you signed up with and we'll email you a reset link.</p><div class="form-group"><input class="form-control" name="email" type="email" /></div><script src="https://recaptcha.net/recaptcha/api.js" async defer></script> <script> var invisibleRecaptchaSubmit = function () { var closestForm = function (ele) { var curEle = ele.parentNode; while (curEle.nodeName !== 'FORM' && curEle.nodeName !== 'BODY'){ curEle = curEle.parentNode; } return curEle.nodeName === 'FORM' ? curEle : null }; var eles = document.getElementsByClassName('g-recaptcha'); if (eles.length > 0) { var form = closestForm(eles[0]); if (form) { form.submit(); } } }; </script> <input type="submit" data-sitekey="6Lf3KHUUAAAAACggoMpmGJdQDtiyrjVlvGJ6BbAj" data-callback="invisibleRecaptchaSubmit" class="g-recaptcha btn btn-primary btn-block" value="Email me a link" value=""/> </form></div></div><script> require.config({ waitSeconds: 90 })(["https://a.academia-assets.com/assets/collapse-45805421cf446ca5adf7aaa1935b08a3a8d1d9a6cc5d91a62a2a3a00b20b3e6a.js"], function() { // from javascript_helper.rb $("#login-modal-reset-password-container").on("shown.bs.collapse", function() { $(this).find("input[type=email]").focus(); }); }); </script> </div></div></div><div class="modal-footer"><div class="text-center"><small style="font-size: 12px;">Need an account? <a rel="nofollow" href="https://www.academia.edu/signup">Click here to sign up</a></small></div></div></div></div></div></div><script>// If we are on subdomain or non-bootstrapped page, redirect to login page instead of showing modal (function(){ if (typeof $ === 'undefined') return; var host = window.location.hostname; if ((host === $domain || host === "www."+$domain) && (typeof $().modal === 'function')) { $("#nav_log_in").click(function(e) { // Don't follow the link and open the modal e.preventDefault(); $("#login-modal").on('shown.bs.modal', function() { $(this).find("#login-modal-email-input").focus() }).modal('show'); }); } })()</script> <div class="bootstrap" id="footer"><div class="footer-content clearfix text-center padding-top-7x" style="width:100%;"><ul class="footer-links-secondary footer-links-wide list-inline margin-bottom-1x"><li><a href="https://www.academia.edu/about">About</a></li><li><a href="https://www.academia.edu/press">Press</a></li><li><a href="https://www.academia.edu/documents">Papers</a></li><li><a href="https://www.academia.edu/topics">Topics</a></li><li><a href="https://www.academia.edu/journals">Academia.edu Journals</a></li><li><a rel="nofollow" href="https://www.academia.edu/hiring"><svg style="width: 13px; height: 13px;" aria-hidden="true" focusable="false" data-prefix="fas" data-icon="briefcase" class="svg-inline--fa fa-briefcase fa-w-16" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M320 336c0 8.84-7.16 16-16 16h-96c-8.84 0-16-7.16-16-16v-48H0v144c0 25.6 22.4 48 48 48h416c25.6 0 48-22.4 48-48V288H320v48zm144-208h-80V80c0-25.6-22.4-48-48-48H176c-25.6 0-48 22.4-48 48v48H48c-25.6 0-48 22.4-48 48v80h512v-80c0-25.6-22.4-48-48-48zm-144 0H192V96h128v32z"></path></svg> <strong>We're Hiring!</strong></a></li><li><a rel="nofollow" href="https://support.academia.edu/hc/en-us"><svg style="width: 12px; height: 12px;" aria-hidden="true" focusable="false" data-prefix="fas" data-icon="question-circle" class="svg-inline--fa fa-question-circle fa-w-16" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M504 256c0 136.997-111.043 248-248 248S8 392.997 8 256C8 119.083 119.043 8 256 8s248 111.083 248 248zM262.655 90c-54.497 0-89.255 22.957-116.549 63.758-3.536 5.286-2.353 12.415 2.715 16.258l34.699 26.31c5.205 3.947 12.621 3.008 16.665-2.122 17.864-22.658 30.113-35.797 57.303-35.797 20.429 0 45.698 13.148 45.698 32.958 0 14.976-12.363 22.667-32.534 33.976C247.128 238.528 216 254.941 216 296v4c0 6.627 5.373 12 12 12h56c6.627 0 12-5.373 12-12v-1.333c0-28.462 83.186-29.647 83.186-106.667 0-58.002-60.165-102-116.531-102zM256 338c-25.365 0-46 20.635-46 46 0 25.364 20.635 46 46 46s46-20.636 46-46c0-25.365-20.635-46-46-46z"></path></svg> <strong>Help Center</strong></a></li></ul><ul class="footer-links-tertiary list-inline margin-bottom-1x"><li class="small">Find new research papers in:</li><li class="small"><a href="https://www.academia.edu/Documents/in/Physics">Physics</a></li><li class="small"><a href="https://www.academia.edu/Documents/in/Chemistry">Chemistry</a></li><li class="small"><a href="https://www.academia.edu/Documents/in/Biology">Biology</a></li><li class="small"><a href="https://www.academia.edu/Documents/in/Health_Sciences">Health Sciences</a></li><li class="small"><a href="https://www.academia.edu/Documents/in/Ecology">Ecology</a></li><li class="small"><a href="https://www.academia.edu/Documents/in/Earth_Sciences">Earth Sciences</a></li><li class="small"><a href="https://www.academia.edu/Documents/in/Cognitive_Science">Cognitive Science</a></li><li class="small"><a href="https://www.academia.edu/Documents/in/Mathematics">Mathematics</a></li><li class="small"><a href="https://www.academia.edu/Documents/in/Computer_Science">Computer Science</a></li></ul></div></div><div class="DesignSystem" id="credit" style="width:100%;"><ul class="u-pl0x footer-links-legal list-inline"><li><a rel="nofollow" href="https://www.academia.edu/terms">Terms</a></li><li><a rel="nofollow" href="https://www.academia.edu/privacy">Privacy</a></li><li><a rel="nofollow" href="https://www.academia.edu/copyright">Copyright</a></li><li>Academia ©2025</li></ul></div><script> //<![CDATA[ window.detect_gmtoffset = true; window.Academia && window.Academia.set_gmtoffset && Academia.set_gmtoffset('/gmtoffset'); //]]> </script> <div id='overlay_background'></div> <div id='bootstrap-modal-container' class='bootstrap'></div> <div id='ds-modal-container' class='bootstrap DesignSystem'></div> <div id='full-screen-modal'></div> </div> </body> </html>