CINXE.COM

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–43 of 43 results for author: <span class="mathjax">Simchi-Levi, D</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Simchi-Levi%2C+D">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Simchi-Levi, D"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Simchi-Levi%2C+D&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Simchi-Levi, D"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18359">arXiv:2501.18359</a> <span> [<a href="https://arxiv.org/pdf/2501.18359">pdf</a>, <a href="https://arxiv.org/format/2501.18359">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Contextual Online Decision Making with Infinite-Dimensional Functional Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+H">Haichen Hu</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+R">Rui Ai</a>, <a href="/search/cs?searchtype=author&query=Bates%2C+S">Stephen Bates</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18359v1-abstract-short" style="display: inline;"> Contextual sequential decision-making problems play a crucial role in machine learning, encompassing a wide range of downstream applications such as bandits, sequential hypothesis testing and online risk control. These applications often require different statistical measures, including expectation, variance and quantiles. In this paper, we provide a universal admissible algorithm framework for de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18359v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18359v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18359v1-abstract-full" style="display: none;"> Contextual sequential decision-making problems play a crucial role in machine learning, encompassing a wide range of downstream applications such as bandits, sequential hypothesis testing and online risk control. These applications often require different statistical measures, including expectation, variance and quantiles. In this paper, we provide a universal admissible algorithm framework for dealing with all kinds of contextual online decision-making problems that directly learns the whole underlying unknown distribution instead of focusing on individual statistics. This is much more difficult because the dimension of the regression is uncountably infinite, and any existing linear contextual bandits algorithm will result in infinite regret. To overcome this issue, we propose an efficient infinite-dimensional functional regression oracle for contextual cumulative distribution functions (CDFs), where each data point is modeled as a combination of context-dependent CDF basis functions. Our analysis reveals that the decay rate of the eigenvalue sequence of the design integral operator governs the regression error rate and, consequently, the utility regret rate. Specifically, when the eigenvalue sequence exhibits a polynomial decay of order $\frac{1}纬\ge 1$, the utility regret is bounded by $\tilde{\mathcal{O}}\Big(T^{\frac{3纬+2}{2(纬+2)}}\Big)$. By setting $纬=0$, this recovers the existing optimal regret rate for contextual bandits with finite-dimensional regression and is optimal under a stronger exponential decay assumption. Additionally, we provide a numerical method to compute the eigenvalue sequence of the integral operator, enabling the practical implementation of our framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18359v1-abstract-full').style.display = 'none'; document.getElementById('2501.18359v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14155">arXiv:2501.14155</a> <span> [<a href="https://arxiv.org/pdf/2501.14155">pdf</a>, <a href="https://arxiv.org/format/2501.14155">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning to Price with Resource Constraints: From Full Information to Machine-Learned Prices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ao%2C+R">Ruicheng Ao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jiashuo Jiang</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14155v1-abstract-short" style="display: inline;"> We study the dynamic pricing problem with knapsack, addressing the challenge of balancing exploration and exploitation under resource constraints. We introduce three algorithms tailored to different informational settings: a Boundary Attracted Re-solve Method for full information, an online learning algorithm for scenarios with no prior information, and an estimate-then-select re-solve algorithm t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14155v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14155v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14155v1-abstract-full" style="display: none;"> We study the dynamic pricing problem with knapsack, addressing the challenge of balancing exploration and exploitation under resource constraints. We introduce three algorithms tailored to different informational settings: a Boundary Attracted Re-solve Method for full information, an online learning algorithm for scenarios with no prior information, and an estimate-then-select re-solve algorithm that leverages machine-learned informed prices with known upper bound of estimation errors. The Boundary Attracted Re-solve Method achieves logarithmic regret without requiring the non-degeneracy condition, while the online learning algorithm attains an optimal $O(\sqrt{T})$ regret. Our estimate-then-select approach bridges the gap between these settings, providing improved regret bounds when reliable offline data is available. Numerical experiments validate the effectiveness and robustness of our algorithms across various scenarios. This work advances the understanding of online resource allocation and dynamic pricing, offering practical solutions adaptable to different informational structures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14155v1-abstract-full').style.display = 'none'; document.getElementById('2501.14155v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12036">arXiv:2411.12036</a> <span> [<a href="https://arxiv.org/pdf/2411.12036">pdf</a>, <a href="https://arxiv.org/format/2411.12036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Econometrics">econ.EM</span> </div> </div> <p class="title is-5 mathjax"> Prediction-Guided Active Experiments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ao%2C+R">Ruicheng Ao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyu Chen</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12036v2-abstract-short" style="display: inline;"> In this work, we introduce a new framework for active experimentation, the Prediction-Guided Active Experiment (PGAE), which leverages predictions from an existing machine learning model to guide sampling and experimentation. Specifically, at each time step, an experimental unit is sampled according to a designated sampling distribution, and the actual outcome is observed based on an experimental… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12036v2-abstract-full').style.display = 'inline'; document.getElementById('2411.12036v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12036v2-abstract-full" style="display: none;"> In this work, we introduce a new framework for active experimentation, the Prediction-Guided Active Experiment (PGAE), which leverages predictions from an existing machine learning model to guide sampling and experimentation. Specifically, at each time step, an experimental unit is sampled according to a designated sampling distribution, and the actual outcome is observed based on an experimental probability. Otherwise, only a prediction for the outcome is available. We begin by analyzing the non-adaptive case, where full information on the joint distribution of the predictor and the actual outcome is assumed. For this scenario, we derive an optimal experimentation strategy by minimizing the semi-parametric efficiency bound for the class of regular estimators. We then introduce an estimator that meets this efficiency bound, achieving asymptotic optimality. Next, we move to the adaptive case, where the predictor is continuously updated with newly sampled data. We show that the adaptive version of the estimator remains efficient and attains the same semi-parametric bound under certain regularity assumptions. Finally, we validate PGAE's performance through simulations and a semi-synthetic experiment using data from the US Census Bureau. The results underscore the PGAE framework's effectiveness and superiority compared to other existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12036v2-abstract-full').style.display = 'none'; document.getElementById('2411.12036v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05552">arXiv:2410.05552</a> <span> [<a href="https://arxiv.org/pdf/2410.05552">pdf</a>, <a href="https://arxiv.org/ps/2410.05552">ps</a>, <a href="https://arxiv.org/format/2410.05552">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Optimal Adaptive Experimental Design for Estimating Treatment Effect </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiachun Li</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yunxiao Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05552v3-abstract-short" style="display: inline;"> Given n experiment subjects with potentially heterogeneous covariates and two possible treatments, namely active treatment and control, this paper addresses the fundamental question of determining the optimal accuracy in estimating the treatment effect. Furthermore, we propose an experimental design that approaches this optimal accuracy, giving a (non-asymptotic) answer to this fundamental yet sti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05552v3-abstract-full').style.display = 'inline'; document.getElementById('2410.05552v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05552v3-abstract-full" style="display: none;"> Given n experiment subjects with potentially heterogeneous covariates and two possible treatments, namely active treatment and control, this paper addresses the fundamental question of determining the optimal accuracy in estimating the treatment effect. Furthermore, we propose an experimental design that approaches this optimal accuracy, giving a (non-asymptotic) answer to this fundamental yet still open question. The methodological contribution is listed as following. First, we establish an idealized optimal estimator with minimal variance as benchmark, and then demonstrate that adaptive experiment is necessary to achieve near-optimal estimation accuracy. Secondly, by incorporating the concept of doubly robust method into sequential experimental design, we frame the optimal estimation problem as an online bandit learning problem, bridging the two fields of statistical estimation and bandit learning. Using tools and ideas from both bandit algorithm design and adaptive statistical estimation, we propose a general low switching adaptive experiment framework, which could be a generic research paradigm for a wide range of adaptive experimental design. Through novel lower bound techniques for non-i.i.d. data, we demonstrate the optimality of our proposed experiment. Numerical result indicates that the estimation accuracy approaches optimal with as few as two or three policy updates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05552v3-abstract-full').style.display = 'none'; document.getElementById('2410.05552v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Delete unrelated figure, update new lower bound results</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19618">arXiv:2407.19618</a> <span> [<a href="https://arxiv.org/pdf/2407.19618">pdf</a>, <a href="https://arxiv.org/format/2407.19618">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Econometrics">econ.EM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Experimenting on Markov Decision Processes with Local Treatments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shuze Chen</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chonghuan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19618v2-abstract-short" style="display: inline;"> Utilizing randomized experiments to evaluate the effect of short-term treatments on the short-term outcomes has been well understood and become the golden standard in industrial practice. However, as service systems become increasingly dynamical and personalized, much focus is shifting toward maximizing long-term cumulative outcomes, such as customer lifetime value, through lifetime exposure to in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19618v2-abstract-full').style.display = 'inline'; document.getElementById('2407.19618v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19618v2-abstract-full" style="display: none;"> Utilizing randomized experiments to evaluate the effect of short-term treatments on the short-term outcomes has been well understood and become the golden standard in industrial practice. However, as service systems become increasingly dynamical and personalized, much focus is shifting toward maximizing long-term cumulative outcomes, such as customer lifetime value, through lifetime exposure to interventions. To bridge this gap, we investigate the randomized experiments within dynamical systems modeled as Markov Decision Processes (MDPs). Our goal is to assess the impact of treatment and control policies on long-term cumulative rewards from relatively short-term observations. We first develop optimal inference techniques for assessing the effects of general treatment patterns. Furthermore, recognizing that many real-world treatments tend to be fine-grained and localized for practical efficiency and operational convenience, we then propose methods to harness this localized structure by sharing information on the non-targeted states. Our new estimator effectively overcomes the variance lower bound for general treatments while matching the more stringent lower bound incorporating the local treatment structure. Furthermore, our estimator can optimally achieve a linear reduction with the number of test arms for a major part of the variance. Finally, we explore scenarios with perfect knowledge of the control arm and design estimators that further improve inference efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19618v2-abstract-full').style.display = 'none'; document.getElementById('2407.19618v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17796">arXiv:2405.17796</a> <span> [<a href="https://arxiv.org/pdf/2405.17796">pdf</a>, <a href="https://arxiv.org/ps/2405.17796">ps</a>, <a href="https://arxiv.org/format/2405.17796">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Offline Oracle-Efficient Learning for Contextual MDPs via Layerwise Exploration-Exploitation Tradeoff </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qian%2C+J">Jian Qian</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Haichen Hu</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17796v1-abstract-short" style="display: inline;"> Motivated by the recent discovery of a statistical and computational reduction from contextual bandits to offline regression (Simchi-Levi and Xu, 2021), we address the general (stochastic) Contextual Markov Decision Process (CMDP) problem with horizon H (as known as CMDP with H layers). In this paper, we introduce a reduction from CMDPs to offline density estimation under the realizability assumpt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17796v1-abstract-full').style.display = 'inline'; document.getElementById('2405.17796v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17796v1-abstract-full" style="display: none;"> Motivated by the recent discovery of a statistical and computational reduction from contextual bandits to offline regression (Simchi-Levi and Xu, 2021), we address the general (stochastic) Contextual Markov Decision Process (CMDP) problem with horizon H (as known as CMDP with H layers). In this paper, we introduce a reduction from CMDPs to offline density estimation under the realizability assumption, i.e., a model class M containing the true underlying CMDP is provided in advance. We develop an efficient, statistically near-optimal algorithm requiring only O(HlogT) calls to an offline density estimation algorithm (or oracle) across all T rounds of interaction. This number can be further reduced to O(HloglogT) if T is known in advance. Our results mark the first efficient and near-optimal reduction from CMDPs to offline density estimation without imposing any structural assumptions on the model class. A notable feature of our algorithm is the design of a layerwise exploration-exploitation tradeoff tailored to address the layerwise structure of CMDPs. Additionally, our algorithm is versatile and applicable to pure exploration tasks in reward-free reinforcement learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17796v1-abstract-full').style.display = 'none'; document.getElementById('2405.17796v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09413">arXiv:2404.09413</a> <span> [<a href="https://arxiv.org/pdf/2404.09413">pdf</a>, <a href="https://arxiv.org/format/2404.09413">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the Optimal Regret of Locally Private Linear Contextual Bandit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiachun Li</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yining Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09413v1-abstract-short" style="display: inline;"> Contextual bandit with linear reward functions is among one of the most extensively studied models in bandit and online learning research. Recently, there has been increasing interest in designing \emph{locally private} linear contextual bandit algorithms, where sensitive information contained in contexts and rewards is protected against leakage to the general public. While the classical linear co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09413v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09413v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09413v1-abstract-full" style="display: none;"> Contextual bandit with linear reward functions is among one of the most extensively studied models in bandit and online learning research. Recently, there has been increasing interest in designing \emph{locally private} linear contextual bandit algorithms, where sensitive information contained in contexts and rewards is protected against leakage to the general public. While the classical linear contextual bandit algorithm admits cumulative regret upper bounds of $\tilde O(\sqrt{T})$ via multiple alternative methods, it has remained open whether such regret bounds are attainable in the presence of local privacy constraints, with the state-of-the-art result being $\tilde O(T^{3/4})$. In this paper, we show that it is indeed possible to achieve an $\tilde O(\sqrt{T})$ regret upper bound for locally private linear contextual bandit. Our solution relies on several new algorithmic and analytical ideas, such as the analysis of mean absolute deviation errors and layered principal component regression in order to achieve small mean absolute deviation errors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09413v1-abstract-full').style.display = 'none'; document.getElementById('2404.09413v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11425">arXiv:2402.11425</a> <span> [<a href="https://arxiv.org/pdf/2402.11425">pdf</a>, <a href="https://arxiv.org/format/2402.11425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Online Multiple Testing: A Resource Allocation Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ao%2C+R">Ruicheng Ao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyu Chen</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+F">Feng Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11425v4-abstract-short" style="display: inline;"> We consider the problem of sequentially conducting multiple experiments where each experiment corresponds to a hypothesis testing task. At each time point, the experimenter must make an irrevocable decision of whether to reject the null hypothesis (or equivalently claim a discovery) before the next experimental result arrives. The goal is to maximize the number of discoveries while maintaining a l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11425v4-abstract-full').style.display = 'inline'; document.getElementById('2402.11425v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11425v4-abstract-full" style="display: none;"> We consider the problem of sequentially conducting multiple experiments where each experiment corresponds to a hypothesis testing task. At each time point, the experimenter must make an irrevocable decision of whether to reject the null hypothesis (or equivalently claim a discovery) before the next experimental result arrives. The goal is to maximize the number of discoveries while maintaining a low error rate at all time points measured by Local False Discovery Rate (LFDR). We formulate the problem as an online knapsack problem with exogenous random budget replenishment. We start with general arrival distributions and show that a simple policy achieves a $O(\sqrt{T})$ regret. We complement the result by showing that such regret rate is in general not improvable. We then shift our focus to discrete arrival distributions. We find that many existing re-solving heuristics in the online resource allocation literature, albeit achieve bounded loss in canonical settings, may incur a $惟(\sqrt{T})$ or even a $惟(T)$ regret. With the observation that canonical policies tend to be too optimistic and over claim discoveries, we propose a novel policy that incorporates budget safety buffers. It turns out that a little more safety can greatly enhance efficiency -- small additional logarithmic buffers suffice to reduce the regret from $惟(\sqrt{T})$ or even $惟(T)$ to $O(\ln^2 T)$. From a practical perspective, we extend the policy to the scenario with continuous arrival distributions, time-dependent information structures, as well as unknown $T$. We conduct both synthetic experiments and empirical applications on a time series data from New York City taxi passengers to validate the performance of our proposed policies. Our results emphasize how effective policies should be designed in online resource allocation problems with exogenous budget replenishment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11425v4-abstract-full').style.display = 'none'; document.getElementById('2402.11425v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08224">arXiv:2401.08224</a> <span> [<a href="https://arxiv.org/pdf/2401.08224">pdf</a>, <a href="https://arxiv.org/format/2401.08224">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Privacy Preserving Adaptive Experiment Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiachun Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+K">Kaining Shi</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08224v4-abstract-short" style="display: inline;"> Adaptive experiment is widely adopted to estimate conditional average treatment effect (CATE) in clinical trials and many other scenarios. While the primary goal in experiment is to maximize estimation accuracy, due to the imperative of social welfare, it's also crucial to provide treatment with superior outcomes to patients, which is measured by regret in contextual bandit framework. These two ob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08224v4-abstract-full').style.display = 'inline'; document.getElementById('2401.08224v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08224v4-abstract-full" style="display: none;"> Adaptive experiment is widely adopted to estimate conditional average treatment effect (CATE) in clinical trials and many other scenarios. While the primary goal in experiment is to maximize estimation accuracy, due to the imperative of social welfare, it's also crucial to provide treatment with superior outcomes to patients, which is measured by regret in contextual bandit framework. These two objectives often lead to contrast optimal allocation mechanism. Furthermore, privacy concerns arise in clinical scenarios containing sensitive data like patients health records. Therefore, it's essential for the treatment allocation mechanism to incorporate robust privacy protection measures. In this paper, we investigate the tradeoff between loss of social welfare and statistical power in contextual bandit experiment. We propose a matched upper and lower bound for the multi-objective optimization problem, and then adopt the concept of Pareto optimality to mathematically characterize the optimality condition. Furthermore, we propose differentially private algorithms which still matches the lower bound, showing that privacy is "almost free". Additionally, we derive the asymptotic normality of the estimator, which is essential in statistical inference and hypothesis testing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08224v4-abstract-full').style.display = 'none'; document.getElementById('2401.08224v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Add a table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.16528">arXiv:2311.16528</a> <span> [<a href="https://arxiv.org/pdf/2311.16528">pdf</a>, <a href="https://arxiv.org/format/2311.16528">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Utility Fairness in Contextual Dynamic Pricing with Demand Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yining Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.16528v1-abstract-short" style="display: inline;"> This paper introduces a novel contextual bandit algorithm for personalized pricing under utility fairness constraints in scenarios with uncertain demand, achieving an optimal regret upper bound. Our approach, which incorporates dynamic pricing and demand learning, addresses the critical challenge of fairness in pricing strategies. We first delve into the static full-information setting to formulat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16528v1-abstract-full').style.display = 'inline'; document.getElementById('2311.16528v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.16528v1-abstract-full" style="display: none;"> This paper introduces a novel contextual bandit algorithm for personalized pricing under utility fairness constraints in scenarios with uncertain demand, achieving an optimal regret upper bound. Our approach, which incorporates dynamic pricing and demand learning, addresses the critical challenge of fairness in pricing strategies. We first delve into the static full-information setting to formulate an optimal pricing policy as a constrained optimization problem. Here, we propose an approximation algorithm for efficiently and approximately computing the ideal policy. We also use mathematical analysis and computational studies to characterize the structures of optimal contextual pricing policies subject to fairness constraints, deriving simplified policies which lays the foundations of more in-depth research and extensions. Further, we extend our study to dynamic pricing problems with demand learning, establishing a non-standard regret lower bound that highlights the complexity added by fairness constraints. Our research offers a comprehensive analysis of the cost of fairness and its impact on the balance between utility and revenue maximization. This work represents a step towards integrating ethical considerations into algorithmic efficiency in data-driven dynamic pricing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16528v1-abstract-full').style.display = 'none'; document.getElementById('2311.16528v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.04341">arXiv:2304.04341</a> <span> [<a href="https://arxiv.org/pdf/2304.04341">pdf</a>, <a href="https://arxiv.org/ps/2304.04341">ps</a>, <a href="https://arxiv.org/format/2304.04341">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Regret Distribution in Stochastic Bandits: Optimal Trade-off between Expectation and Tail Risk </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zeyu Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+F">Feng Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.04341v1-abstract-short" style="display: inline;"> We study the trade-off between expectation and tail risk for regret distribution in the stochastic multi-armed bandit problem. We fully characterize the interplay among three desired properties for policy design: worst-case optimality, instance-dependent consistency, and light-tailed risk. We show how the order of expected regret exactly affects the decaying rate of the regret tail probability for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.04341v1-abstract-full').style.display = 'inline'; document.getElementById('2304.04341v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.04341v1-abstract-full" style="display: none;"> We study the trade-off between expectation and tail risk for regret distribution in the stochastic multi-armed bandit problem. We fully characterize the interplay among three desired properties for policy design: worst-case optimality, instance-dependent consistency, and light-tailed risk. We show how the order of expected regret exactly affects the decaying rate of the regret tail probability for both the worst-case and instance-dependent scenario. A novel policy is proposed to characterize the optimal regret tail probability for any regret threshold. Concretely, for any given $伪\in[1/2, 1)$ and $尾\in[0, 伪]$, our policy achieves a worst-case expected regret of $\tilde O(T^伪)$ (we call it $伪$-optimal) and an instance-dependent expected regret of $\tilde O(T^尾)$ (we call it $尾$-consistent), while enjoys a probability of incurring an $\tilde O(T^未)$ regret ($未\geq伪$ in the worst-case scenario and $未\geq尾$ in the instance-dependent scenario) that decays exponentially with a polynomial $T$ term. Such decaying rate is proved to be best achievable. Moreover, we discover an intrinsic gap of the optimal tail rate under the instance-dependent scenario between whether the time horizon $T$ is known a priori or not. Interestingly, when it comes to the worst-case scenario, this gap disappears. Finally, we extend our proposed policy design to (1) a stochastic multi-armed bandit setting with non-stationary baseline rewards, and (2) a stochastic linear bandit setting. Our results reveal insights on the trade-off between regret expectation and regret tail risk for both worst-case and instance-dependent scenarios, indicating that more sub-optimality and inconsistency leave space for more light-tailed risk of incurring a large regret, and that knowing the planning horizon in advance can make a difference on alleviating tail risks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.04341v1-abstract-full').style.display = 'none'; document.getElementById('2304.04341v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2206.02969</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.13099">arXiv:2209.13099</a> <span> [<a href="https://arxiv.org/pdf/2209.13099">pdf</a>, <a href="https://arxiv.org/format/2209.13099">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Mechanism Design for Blockchain Transaction Fee Allocation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zishuo Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuan Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.13099v7-abstract-short" style="display: inline;"> In blockchain systems, the design of transaction fee mechanisms is essential for stability and satisfaction for both miners and users. A recent work has proven the impossibility of collusion-proof mechanisms that achieve both non-zero miner revenue and Dominating-Strategy-Incentive-Compatible (DSIC) for users. However, a positive miner revenue is important in practice to motivate miners. To addres… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.13099v7-abstract-full').style.display = 'inline'; document.getElementById('2209.13099v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.13099v7-abstract-full" style="display: none;"> In blockchain systems, the design of transaction fee mechanisms is essential for stability and satisfaction for both miners and users. A recent work has proven the impossibility of collusion-proof mechanisms that achieve both non-zero miner revenue and Dominating-Strategy-Incentive-Compatible (DSIC) for users. However, a positive miner revenue is important in practice to motivate miners. To address this challenge, we consider a Bayesian game setting and relax the DSIC requirement for users to Bayesian-Nash-Incentive-Compatibility (BNIC). In particular, we propose an auxiliary mechanism method that makes connections between BNIC and DSIC mechanisms. With the auxiliary mechanism method, we design a transaction fee mechanism (TFM) based on the multinomial logit (MNL) choice model, and prove that the TFM has both BNIC and collusion-proof properties with an asymptotic constant-factor approximation of optimal miner revenue for i.i.d. bounded valuations. Our result breaks the zero-revenue barrier while preserving truthfulness and collusion-proof properties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.13099v7-abstract-full').style.display = 'none'; document.getElementById('2209.13099v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">71 pages, Operations Research (2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.02969">arXiv:2206.02969</a> <span> [<a href="https://arxiv.org/pdf/2206.02969">pdf</a>, <a href="https://arxiv.org/format/2206.02969">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> A Simple and Optimal Policy Design with Safety against Heavy-Tailed Risk for Stochastic Bandits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zeyu Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+F">Feng Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.02969v6-abstract-short" style="display: inline;"> We study the stochastic multi-armed bandit problem and design new policies that enjoy both worst-case optimality for expected regret and light-tailed risk for regret distribution. Specifically, our policy design (i) enjoys the worst-case optimality for the expected regret at order $O(\sqrt{KT\ln T})$ and (ii) has the worst-case tail probability of incurring a regret larger than any $x>0$ being upp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02969v6-abstract-full').style.display = 'inline'; document.getElementById('2206.02969v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.02969v6-abstract-full" style="display: none;"> We study the stochastic multi-armed bandit problem and design new policies that enjoy both worst-case optimality for expected regret and light-tailed risk for regret distribution. Specifically, our policy design (i) enjoys the worst-case optimality for the expected regret at order $O(\sqrt{KT\ln T})$ and (ii) has the worst-case tail probability of incurring a regret larger than any $x>0$ being upper bounded by $\exp(-惟(x/\sqrt{KT}))$, a rate that we prove to be best achievable with respect to $T$ for all worst-case optimal policies. Our proposed policy achieves a delicate balance between doing more exploration at the beginning of the time horizon and doing more exploitation when approaching the end, compared to standard confidence-bound-based policies. We also enhance the policy design to accommodate the "any-time" setting where $T$ is unknown a priori, and prove equivalently desired policy performances as compared to the "fixed-time" setting with known $T$. Numerical experiments are conducted to illustrate the theoretical findings. We find that from a managerial perspective, our new policy design yields better tail distributions and is preferable than celebrated policies especially when (i) there is a risk of under-estimating the volatility profile, or (ii) there is a challenge of tuning policy hyper-parameters. We conclude by extending our proposed policy design to the stochastic linear bandit setting that leads to both worst-case optimality in terms of expected regret and light-tailed risk on the regret distribution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02969v6-abstract-full').style.display = 'none'; document.getElementById('2206.02969v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preliminary version appeared in NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.10919">arXiv:2111.10919</a> <span> [<a href="https://arxiv.org/pdf/2111.10919">pdf</a>, <a href="https://arxiv.org/format/2111.10919">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Offline Reinforcement Learning: Fundamental Barriers for Value Function Approximation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Foster%2C+D+J">Dylan J. Foster</a>, <a href="/search/cs?searchtype=author&query=Krishnamurthy%2C+A">Akshay Krishnamurthy</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yunzong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.10919v2-abstract-short" style="display: inline;"> We consider the offline reinforcement learning problem, where the aim is to learn a decision making policy from logged data. Offline RL -- particularly when coupled with (value) function approximation to allow for generalization in large or continuous state spaces -- is becoming increasingly relevant in practice, because it avoids costly and time-consuming online data collection and is well suited… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.10919v2-abstract-full').style.display = 'inline'; document.getElementById('2111.10919v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.10919v2-abstract-full" style="display: none;"> We consider the offline reinforcement learning problem, where the aim is to learn a decision making policy from logged data. Offline RL -- particularly when coupled with (value) function approximation to allow for generalization in large or continuous state spaces -- is becoming increasingly relevant in practice, because it avoids costly and time-consuming online data collection and is well suited to safety-critical domains. Existing sample complexity guarantees for offline value function approximation methods typically require both (1) distributional assumptions (i.e., good coverage) and (2) representational assumptions (i.e., ability to represent some or all $Q$-value functions) stronger than what is required for supervised learning. However, the necessity of these conditions and the fundamental limits of offline RL are not well understood in spite of decades of research. This led Chen and Jiang (2019) to conjecture that concentrability (the most standard notion of coverage) and realizability (the weakest representation condition) alone are not sufficient for sample-efficient offline RL. We resolve this conjecture in the positive by proving that in general, even if both concentrability and realizability are satisfied, any algorithm requires sample complexity polynomial in the size of the state space to learn a non-trivial policy. Our results show that sample-efficient offline reinforcement learning requires either restrictive coverage conditions or representation conditions that go beyond supervised learning, and highlight a phenomenon called over-coverage which serves as a fundamental barrier for offline value function approximation methods. A consequence of our results for reinforcement learning with linear function approximation is that the separation between online and offline RL can be arbitrarily large, even in constant dimension. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.10919v2-abstract-full').style.display = 'none'; document.getElementById('2111.10919v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for presentation at the Conference on Learning Theory (COLT) 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.00790">arXiv:2111.00790</a> <span> [<a href="https://arxiv.org/pdf/2111.00790">pdf</a>, <a href="https://arxiv.org/ps/2111.00790">ps</a>, <a href="https://arxiv.org/format/2111.00790">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Pricing and Demand Learning on a Large Network of Products: A PAC-Bayesian Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Keskin%2C+N+B">N. Bora Keskin</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Talwai%2C+P">Prem Talwai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.00790v3-abstract-short" style="display: inline;"> We consider a seller offering a large network of $N$ products over a time horizon of $T$ periods. The seller does not know the parameters of the products' linear demand model, and can dynamically adjust product prices to learn the demand model based on sales observations. The seller aims to minimize its pseudo-regret, i.e., the expected revenue loss relative to a clairvoyant who knows the underlyi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.00790v3-abstract-full').style.display = 'inline'; document.getElementById('2111.00790v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.00790v3-abstract-full" style="display: none;"> We consider a seller offering a large network of $N$ products over a time horizon of $T$ periods. The seller does not know the parameters of the products' linear demand model, and can dynamically adjust product prices to learn the demand model based on sales observations. The seller aims to minimize its pseudo-regret, i.e., the expected revenue loss relative to a clairvoyant who knows the underlying demand model. We consider a sparse set of demand relationships between products to characterize various connectivity properties of the product network. In particular, we study three different sparsity frameworks: (1) $L_0$ sparsity, which constrains the number of connections in the network, and (2) off-diagonal sparsity, which constrains the magnitude of cross-product price sensitivities, and (3) a new notion of spectral sparsity, which constrains the asymptotic decay of a similarity metric on network nodes. We propose a dynamic pricing-and-learning policy that combines the optimism-in-the-face-of-uncertainty and PAC-Bayesian approaches, and show that this policy achieves asymptotically optimal performance in terms of $N$ and $T$. We also show that in the case of spectral and off-diagonal sparsity, the seller can have a pseudo-regret linear in $N$, even when the network is dense. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.00790v3-abstract-full').style.display = 'none'; document.getElementById('2111.00790v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.14813">arXiv:2106.14813</a> <span> [<a href="https://arxiv.org/pdf/2106.14813">pdf</a>, <a href="https://arxiv.org/format/2106.14813">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Offline Planning and Online Learning under Recovering Rewards </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zeyu Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+F">Feng Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.14813v2-abstract-short" style="display: inline;"> Motivated by emerging applications such as live-streaming e-commerce, promotions and recommendations, we introduce and solve a general class of non-stationary multi-armed bandit problems that have the following two features: (i) the decision maker can pull and collect rewards from up to $K\,(\ge 1)$ out of $N$ different arms in each time period; (ii) the expected reward of an arm immediately drops… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.14813v2-abstract-full').style.display = 'inline'; document.getElementById('2106.14813v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.14813v2-abstract-full" style="display: none;"> Motivated by emerging applications such as live-streaming e-commerce, promotions and recommendations, we introduce and solve a general class of non-stationary multi-armed bandit problems that have the following two features: (i) the decision maker can pull and collect rewards from up to $K\,(\ge 1)$ out of $N$ different arms in each time period; (ii) the expected reward of an arm immediately drops after it is pulled, and then non-parametrically recovers as the arm's idle time increases. With the objective of maximizing the expected cumulative reward over $T$ time periods, we design a class of ``Purely Periodic Policies'' that jointly set a period to pull each arm. For the proposed policies, we prove performance guarantees for both the offline problem and the online problems. For the offline problem when all model parameters are known, the proposed periodic policy obtains an approximation ratio that is at the order of $1-\mathcal O(1/\sqrt{K})$, which is asymptotically optimal when $K$ grows to infinity. For the online problem when the model parameters are unknown and need to be dynamically learned, we integrate the offline periodic policy with the upper confidence bound procedure to construct on online policy. The proposed online policy is proved to approximately have $\widetilde{\mathcal O}(N\sqrt{T})$ regret against the offline benchmark. Our framework and policy design may shed light on broader offline planning and online learning applications with non-stationary and recovering rewards. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.14813v2-abstract-full').style.display = 'none'; document.getElementById('2106.14813v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">v1 accepted by ICML 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.07446">arXiv:2105.07446</a> <span> [<a href="https://arxiv.org/pdf/2105.07446">pdf</a>, <a href="https://arxiv.org/ps/2105.07446">ps</a>, <a href="https://arxiv.org/format/2105.07446">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Sobolev Norm Learning Rates for Conditional Mean Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Talwai%2C+P">Prem Talwai</a>, <a href="/search/cs?searchtype=author&query=Shameli%2C+A">Ali Shameli</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.07446v3-abstract-short" style="display: inline;"> We develop novel learning rates for conditional mean embeddings by applying the theory of interpolation for reproducing kernel Hilbert spaces (RKHS). We derive explicit, adaptive convergence rates for the sample estimator under the misspecifed setting, where the target operator is not Hilbert-Schmidt or bounded with respect to the input/output RKHSs. We demonstrate that in certain parameter regime… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.07446v3-abstract-full').style.display = 'inline'; document.getElementById('2105.07446v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.07446v3-abstract-full" style="display: none;"> We develop novel learning rates for conditional mean embeddings by applying the theory of interpolation for reproducing kernel Hilbert spaces (RKHS). We derive explicit, adaptive convergence rates for the sample estimator under the misspecifed setting, where the target operator is not Hilbert-Schmidt or bounded with respect to the input/output RKHSs. We demonstrate that in certain parameter regimes, we can achieve uniform convergence rates in the output RKHS. We hope our analyses will allow the much broader application of conditional mean embeddings to more complex ML/RL settings involving infinite dimensional RKHSs and continuous state spaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.07446v3-abstract-full').style.display = 'none'; document.getElementById('2105.07446v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Appears in AISTATS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.03161">arXiv:2010.03161</a> <span> [<a href="https://arxiv.org/pdf/2010.03161">pdf</a>, <a href="https://arxiv.org/format/2010.03161">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Model-Free Non-Stationary RL: Near-Optimal Regret and Applications in Multi-Agent RL and Inventory Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mao%2C+W">Weichao Mao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kaiqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Ruihao Zhu</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Ba%C5%9Far%2C+T">Tamer Ba艧ar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.03161v4-abstract-short" style="display: inline;"> We consider model-free reinforcement learning (RL) in non-stationary Markov decision processes. Both the reward functions and the state transition functions are allowed to vary arbitrarily over time as long as their cumulative variations do not exceed certain variation budgets. We propose Restarted Q-Learning with Upper Confidence Bounds (RestartQ-UCB), the first model-free algorithm for non-stati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.03161v4-abstract-full').style.display = 'inline'; document.getElementById('2010.03161v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.03161v4-abstract-full" style="display: none;"> We consider model-free reinforcement learning (RL) in non-stationary Markov decision processes. Both the reward functions and the state transition functions are allowed to vary arbitrarily over time as long as their cumulative variations do not exceed certain variation budgets. We propose Restarted Q-Learning with Upper Confidence Bounds (RestartQ-UCB), the first model-free algorithm for non-stationary RL, and show that it outperforms existing solutions in terms of dynamic regret. Specifically, RestartQ-UCB with Freedman-type bonus terms achieves a dynamic regret bound of $\widetilde{O}(S^{\frac{1}{3}} A^{\frac{1}{3}} 螖^{\frac{1}{3}} H T^{\frac{2}{3}})$, where $S$ and $A$ are the numbers of states and actions, respectively, $螖>0$ is the variation budget, $H$ is the number of time steps per episode, and $T$ is the total number of time steps. We further present a parameter-free algorithm named Double-Restart Q-UCB that does not require prior knowledge of the variation budget. We show that our algorithms are \emph{nearly optimal} by establishing an information-theoretical lower bound of $惟(S^{\frac{1}{3}} A^{\frac{1}{3}} 螖^{\frac{1}{3}} H^{\frac{2}{3}} T^{\frac{2}{3}})$, the first lower bound in non-stationary RL. Numerical experiments validate the advantages of RestartQ-UCB in terms of both cumulative rewards and computational efficiency. We demonstrate the power of our results in examples of multi-agent RL and inventory control across related products. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.03161v4-abstract-full').style.display = 'none'; document.getElementById('2010.03161v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">A preliminary version of this work has appeared in ICML 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.03104">arXiv:2010.03104</a> <span> [<a href="https://arxiv.org/pdf/2010.03104">pdf</a>, <a href="https://arxiv.org/format/2010.03104">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Instance-Dependent Complexity of Contextual Bandits and Reinforcement Learning: A Disagreement-Based Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Foster%2C+D+J">Dylan J. Foster</a>, <a href="/search/cs?searchtype=author&query=Rakhlin%2C+A">Alexander Rakhlin</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yunzong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.03104v1-abstract-short" style="display: inline;"> In the classical multi-armed bandit problem, instance-dependent algorithms attain improved performance on "easy" problems with a gap between the best and second-best arm. Are similar guarantees possible for contextual bandits? While positive results are known for certain special cases, there is no general theory characterizing when and how instance-dependent regret bounds for contextual bandits ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.03104v1-abstract-full').style.display = 'inline'; document.getElementById('2010.03104v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.03104v1-abstract-full" style="display: none;"> In the classical multi-armed bandit problem, instance-dependent algorithms attain improved performance on "easy" problems with a gap between the best and second-best arm. Are similar guarantees possible for contextual bandits? While positive results are known for certain special cases, there is no general theory characterizing when and how instance-dependent regret bounds for contextual bandits can be achieved for rich, general classes of policies. We introduce a family of complexity measures that are both sufficient and necessary to obtain instance-dependent regret bounds. We then introduce new oracle-efficient algorithms which adapt to the gap whenever possible, while also attaining the minimax rate in the worst case. Finally, we provide structural results that tie together a number of complexity measures previously proposed throughout contextual bandits, reinforcement learning, and active learning and elucidate their role in determining the optimal instance-dependent regret. In a large-scale empirical evaluation, we find that our approach often gives superior results for challenging exploration problems. Turning our focus to reinforcement learning with function approximation, we develop new oracle-efficient algorithms for reinforcement learning with rich observations that obtain optimal gap-dependent sample complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.03104v1-abstract-full').style.display = 'none'; document.getElementById('2010.03104v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.12920">arXiv:2009.12920</a> <span> [<a href="https://arxiv.org/pdf/2009.12920">pdf</a>, <a href="https://arxiv.org/format/2009.12920">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Privacy-Preserving Dynamic Personalized Pricing with Demand Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yining Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.12920v2-abstract-short" style="display: inline;"> The prevalence of e-commerce has made detailed customers' personal information readily accessible to retailers, and this information has been widely used in pricing decisions. When involving personalized information, how to protect the privacy of such information becomes a critical issue in practice. In this paper, we consider a dynamic pricing problem over $T$ time periods with an \emph{unknown}… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.12920v2-abstract-full').style.display = 'inline'; document.getElementById('2009.12920v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.12920v2-abstract-full" style="display: none;"> The prevalence of e-commerce has made detailed customers' personal information readily accessible to retailers, and this information has been widely used in pricing decisions. When involving personalized information, how to protect the privacy of such information becomes a critical issue in practice. In this paper, we consider a dynamic pricing problem over $T$ time periods with an \emph{unknown} demand function of posted price and personalized information. At each time $t$, the retailer observes an arriving customer's personal information and offers a price. The customer then makes the purchase decision, which will be utilized by the retailer to learn the underlying demand function. There is potentially a serious privacy concern during this process: a third party agent might infer the personalized information and purchase decisions from price changes from the pricing system. Using the fundamental framework of differential privacy from computer science, we develop a privacy-preserving dynamic pricing policy, which tries to maximize the retailer revenue while avoiding information leakage of individual customer's information and purchasing decisions. To this end, we first introduce a notion of \emph{anticipating} $(\varepsilon, 未)$-differential privacy that is tailored to dynamic pricing problem. Our policy achieves both the privacy guarantee and the performance guarantee in terms of regret. Roughly speaking, for $d$-dimensional personalized information, our algorithm achieves the expected regret at the order of $\tilde{O}(\varepsilon^{-1} \sqrt{d^3 T})$, when the customers' information is adversarially chosen. For stochastic personalized information, the regret bound can be further improved to $\tilde{O}(\sqrt{d^2T} + \varepsilon^{-2} d^2)$ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.12920v2-abstract-full').style.display = 'none'; document.getElementById('2009.12920v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Final version. Accepted to Management Science</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.00080">arXiv:2007.00080</a> <span> [<a href="https://arxiv.org/pdf/2007.00080">pdf</a>, <a href="https://arxiv.org/ps/2007.00080">ps</a>, <a href="https://arxiv.org/format/2007.00080">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Provably More Efficient Q-Learning in the One-Sided-Feedback/Full-Feedback Settings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gong%2C+X">Xiao-Yue Gong</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.00080v2-abstract-short" style="display: inline;"> Motivated by the episodic version of the classical inventory control problem, we propose a new Q-learning-based algorithm, Elimination-Based Half-Q-Learning (HQL), that enjoys improved efficiency over existing algorithms for a wide variety of problems in the one-sided-feedback setting. We also provide a simpler variant of the algorithm, Full-Q-Learning (FQL), for the full-feedback setting. We esta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.00080v2-abstract-full').style.display = 'inline'; document.getElementById('2007.00080v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.00080v2-abstract-full" style="display: none;"> Motivated by the episodic version of the classical inventory control problem, we propose a new Q-learning-based algorithm, Elimination-Based Half-Q-Learning (HQL), that enjoys improved efficiency over existing algorithms for a wide variety of problems in the one-sided-feedback setting. We also provide a simpler variant of the algorithm, Full-Q-Learning (FQL), for the full-feedback setting. We establish that HQL incurs $ \tilde{\mathcal{O}}(H^3\sqrt{ T})$ regret and FQL incurs $\tilde{\mathcal{O}}(H^2\sqrt{ T})$ regret, where $H$ is the length of each episode and $T$ is the total length of the horizon. The regret bounds are not affected by the possibly huge state and action space. Our numerical experiments demonstrate the superior efficiency of HQL and FQL, and the potential to combine reinforcement learning with richer feedback models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.00080v2-abstract-full').style.display = 'none'; document.getElementById('2007.00080v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.14389">arXiv:2006.14389</a> <span> [<a href="https://arxiv.org/pdf/2006.14389">pdf</a>, <a href="https://arxiv.org/format/2006.14389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Reinforcement Learning for Non-Stationary Markov Decision Processes: The Blessing of (More) Optimism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheung%2C+W+C">Wang Chi Cheung</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Ruihao Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.14389v1-abstract-short" style="display: inline;"> We consider un-discounted reinforcement learning (RL) in Markov decision processes (MDPs) under drifting non-stationarity, i.e., both the reward and state transition distributions are allowed to evolve over time, as long as their respective total variations, quantified by suitable metrics, do not exceed certain variation budgets. We first develop the Sliding Window Upper-Confidence bound for Reinf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.14389v1-abstract-full').style.display = 'inline'; document.getElementById('2006.14389v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.14389v1-abstract-full" style="display: none;"> We consider un-discounted reinforcement learning (RL) in Markov decision processes (MDPs) under drifting non-stationarity, i.e., both the reward and state transition distributions are allowed to evolve over time, as long as their respective total variations, quantified by suitable metrics, do not exceed certain variation budgets. We first develop the Sliding Window Upper-Confidence bound for Reinforcement Learning with Confidence Widening (SWUCRL2-CW) algorithm, and establish its dynamic regret bound when the variation budgets are known. In addition, we propose the Bandit-over-Reinforcement Learning (BORL) algorithm to adaptively tune the SWUCRL2-CW algorithm to achieve the same dynamic regret bound, but in a parameter-free manner, i.e., without knowing the variation budgets. Notably, learning non-stationary MDPs via the conventional optimistic exploration technique presents a unique challenge absent in existing (non-stationary) bandit learning settings. We overcome the challenge by a novel confidence widening technique that incorporates additional optimism. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.14389v1-abstract-full').style.display = 'none'; document.getElementById('2006.14389v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in proceedings of the 37th International Conference on Machine Learning. Shortened conference version of its journal version (available at: arXiv:1906.02922)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.00947">arXiv:2005.00947</a> <span> [<a href="https://arxiv.org/pdf/2005.00947">pdf</a>, <a href="https://arxiv.org/format/2005.00947">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Online Learning and Optimization for Revenue Management Problems with Add-on Discounts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+R">Rui Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huanan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.00947v1-abstract-short" style="display: inline;"> We study in this paper a revenue management problem with add-on discounts. The problem is motivated by the practice in the video game industry, where a retailer offers discounts on selected supportive products (e.g. video games) to customers who have also purchased the core products (e.g. video game consoles). We formulate this problem as an optimization problem to determine the prices of differen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00947v1-abstract-full').style.display = 'inline'; document.getElementById('2005.00947v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.00947v1-abstract-full" style="display: none;"> We study in this paper a revenue management problem with add-on discounts. The problem is motivated by the practice in the video game industry, where a retailer offers discounts on selected supportive products (e.g. video games) to customers who have also purchased the core products (e.g. video game consoles). We formulate this problem as an optimization problem to determine the prices of different products and the selection of products with add-on discounts. To overcome the computational challenge of this optimization problem, we propose an efficient FPTAS algorithm that can solve the problem approximately to any desired accuracy. Moreover, we consider the revenue management problem in the setting where the retailer has no prior knowledge of the demand functions of different products. To resolve this problem, we propose a UCB-based learning algorithm that uses the FPTAS optimization algorithm as a subroutine. We show that our learning algorithm can converge to the optimal algorithm that has access to the true demand functions, and we prove that the convergence rate is tight up to a certain logarithmic term. In addition, we conduct numerical experiments with the real-world transaction data we collect from a popular video gaming brand's online store on Tmall.com. The experiment results illustrate our learning algorithm's robust performance and fast convergence in various scenarios. We also compare our algorithm with the optimal policy that does not use any add-on discount, and the results show the advantages of using the add-on discount strategy in practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00947v1-abstract-full').style.display = 'none'; document.getElementById('2005.00947v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.12699">arXiv:2003.12699</a> <span> [<a href="https://arxiv.org/pdf/2003.12699">pdf</a>, <a href="https://arxiv.org/format/2003.12699">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Bypassing the Monster: A Faster and Simpler Optimal Algorithm for Contextual Bandits under Realizability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yunzong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.12699v5-abstract-short" style="display: inline;"> We consider the general (stochastic) contextual bandit problem under the realizability assumption, i.e., the expected reward, as a function of contexts and actions, belongs to a general function class $\mathcal{F}$. We design a fast and simple algorithm that achieves the statistically optimal regret with only ${O}(\log T)$ calls to an offline regression oracle across all $T$ rounds. The number of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.12699v5-abstract-full').style.display = 'inline'; document.getElementById('2003.12699v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.12699v5-abstract-full" style="display: none;"> We consider the general (stochastic) contextual bandit problem under the realizability assumption, i.e., the expected reward, as a function of contexts and actions, belongs to a general function class $\mathcal{F}$. We design a fast and simple algorithm that achieves the statistically optimal regret with only ${O}(\log T)$ calls to an offline regression oracle across all $T$ rounds. The number of oracle calls can be further reduced to $O(\log\log T)$ if $T$ is known in advance. Our results provide the first universal and optimal reduction from contextual bandits to offline regression, solving an important open problem in the contextual bandit literature. A direct consequence of our results is that any advances in offline regression immediately translate to contextual bandits, statistically and computationally. This leads to faster algorithms and improved regret guarantees for broader classes of contextual bandit problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.12699v5-abstract-full').style.display = 'none'; document.getElementById('2003.12699v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Forthcoming in Mathematics of Operations Research</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.01067">arXiv:1911.01067</a> <span> [<a href="https://arxiv.org/pdf/1911.01067">pdf</a>, <a href="https://arxiv.org/format/1911.01067">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Blind Network Revenue Management and Bandits with Knapsacks under Limited Switches </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yunzong Xu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jinglong Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.01067v4-abstract-short" style="display: inline;"> Our work is motivated by a common business constraint in online markets. While firms respect the advantages of dynamic pricing and price experimentation, they must limit the number of price changes (i.e., switches) to be within some budget due to various practical reasons. We study both the classical price-based network revenue management problem in the distributionally-unknown setup, and the band… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.01067v4-abstract-full').style.display = 'inline'; document.getElementById('1911.01067v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.01067v4-abstract-full" style="display: none;"> Our work is motivated by a common business constraint in online markets. While firms respect the advantages of dynamic pricing and price experimentation, they must limit the number of price changes (i.e., switches) to be within some budget due to various practical reasons. We study both the classical price-based network revenue management problem in the distributionally-unknown setup, and the bandits with knapsacks problem. In these problems, a decision-maker (without prior knowledge of the environment) has finite initial inventory of multiple resources to allocate over a finite time horizon. Beyond the classical resource constraints, we introduce an additional switching constraint to these problems, which restricts the total number of times that the decision-maker makes switches between actions to be within a fixed switching budget. For such problems, we show matching upper and lower bounds on the optimal regret, and propose computationally-efficient limited-switch algorithms that achieve the optimal regret. Our work reveals a surprising result: the optimal regret rate is completely characterized by a piecewise-constant function of the switching budget, which further depends on the number of resource constraints -- to the best of our knowledge, this is the first time the number of resources constraints is shown to play a fundamental role in determining the statistical complexity of online learning problems. We conduct computational experiments to examine the performance of our algorithms on a numerical setup that is widely used in the literature. Compared with benchmark algorithms from the literature, our proposed algorithms achieve promising performance with clear advantages on the number of incurred switches. Practically, firms can benefit from our study and improve their learning and decision-making performance when they simultaneously face resource and switching constraints. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.01067v4-abstract-full').style.display = 'none'; document.getElementById('1911.01067v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.08693">arXiv:1910.08693</a> <span> [<a href="https://arxiv.org/pdf/1910.08693">pdf</a>, <a href="https://arxiv.org/format/1910.08693">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Online Pricing with Offline Data: Phase Transition and Inverse Square Law </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bu%2C+J">Jinzhi Bu</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yunzong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.08693v7-abstract-short" style="display: inline;"> This paper investigates the impact of pre-existing offline data on online learning, in the context of dynamic pricing. We study a single-product dynamic pricing problem over a selling horizon of $T$ periods. The demand in each period is determined by the price of the product according to a linear demand model with unknown parameters. We assume that before the start of the selling horizon, the sell… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.08693v7-abstract-full').style.display = 'inline'; document.getElementById('1910.08693v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.08693v7-abstract-full" style="display: none;"> This paper investigates the impact of pre-existing offline data on online learning, in the context of dynamic pricing. We study a single-product dynamic pricing problem over a selling horizon of $T$ periods. The demand in each period is determined by the price of the product according to a linear demand model with unknown parameters. We assume that before the start of the selling horizon, the seller already has some pre-existing offline data. The offline data set contains $n$ samples, each of which is an input-output pair consisting of a historical price and an associated demand observation. The seller wants to utilize both the pre-existing offline data and the sequential online data to minimize the regret of the online learning process. We characterize the joint effect of the size, location and dispersion of the offline data on the optimal regret of the online learning process. Specifically, the size, location and dispersion of the offline data are measured by the number of historical samples $n$, the distance between the average historical price and the optimal price $未$, and the standard deviation of the historical prices $蟽$, respectively. We show that the optimal regret is $\widetilde 螛\left(\sqrt{T}\wedge \frac{T}{(n\wedge T)未^2+n蟽^2}\right)$, and design a learning algorithm based on the "optimism in the face of uncertainty" principle, whose regret is optimal up to a logarithmic factor. Our results reveal surprising transformations of the optimal regret rate with respect to the size of the offline data, which we refer to as phase transitions. In addition, our results demonstrate that the location and dispersion of the offline data also have an intrinsic effect on the optimal regret, and we quantify this effect via the inverse-square law. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.08693v7-abstract-full').style.display = 'none'; document.getElementById('1910.08693v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Forthcoming in Management Science</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1908.09808">arXiv:1908.09808</a> <span> [<a href="https://arxiv.org/pdf/1908.09808">pdf</a>, <a href="https://arxiv.org/format/1908.09808">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Multi-stage and Multi-customer Assortment Optimization with Inventory Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fata%2C+E">Elaheh Fata</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Will Ma</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1908.09808v2-abstract-short" style="display: inline;"> We consider an assortment optimization problem where a customer chooses a single item from a sequence of sets shown to her, while limited inventories constrain the items offered to customers over time. In the special case where all of the assortments have size one, our problem captures the online stochastic matching with timeouts problem. For this problem, we derive a polynomial-time approximation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.09808v2-abstract-full').style.display = 'inline'; document.getElementById('1908.09808v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1908.09808v2-abstract-full" style="display: none;"> We consider an assortment optimization problem where a customer chooses a single item from a sequence of sets shown to her, while limited inventories constrain the items offered to customers over time. In the special case where all of the assortments have size one, our problem captures the online stochastic matching with timeouts problem. For this problem, we derive a polynomial-time approximation algorithm which earns at least 1-ln(2-1/e), or 0.51, of the optimum. This improves upon the previous-best approximation ratio of 0.46, and furthermore, we show that it is tight. For the general assortment problem, we establish the first constant-factor approximation ratio of 0.09 for the case that different types of customers value items differently, and an approximation ratio of 0.15 for the case that different customers value each item the same. Our algorithms are based on rounding an LP relaxation for multi-stage assortment optimization, and improve upon previous randomized rounding schemes to derive the tight ratio of 1-ln(2-1/e). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.09808v2-abstract-full').style.display = 'none'; document.getElementById('1908.09808v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1907.08735">arXiv:1907.08735</a> <span> [<a href="https://arxiv.org/pdf/1907.08735">pdf</a>, <a href="https://arxiv.org/format/1907.08735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> The Competitive Ratio of Threshold Policies for Online Unit-density Knapsack Problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Will Ma</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jinglong Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1907.08735v3-abstract-short" style="display: inline;"> We study a supply chain ordering problem faced by a wholesale supplier serving unpredictable demand. In this problem, the supplier has an initial stock, and faces a stream of orders for different amounts that are unknown a priori. Each order must be either accepted or rejected immediately, and must respect the knapsack constraint, that is, an order is only acceptable if its amount can be fully ser… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.08735v3-abstract-full').style.display = 'inline'; document.getElementById('1907.08735v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1907.08735v3-abstract-full" style="display: none;"> We study a supply chain ordering problem faced by a wholesale supplier serving unpredictable demand. In this problem, the supplier has an initial stock, and faces a stream of orders for different amounts that are unknown a priori. Each order must be either accepted or rejected immediately, and must respect the knapsack constraint, that is, an order is only acceptable if its amount can be fully served by the remaining stock. The objective is to maximize the total stock spent serving orders. We investigate randomized threshold algorithms that accept an item as long as its size exceeds the threshold. We derive two optimal threshold distributions, the first is 0.4324-competitive relative to the optimal offline integral packing, and the second is 0.4285-competitive relative to the optimal offline fractional packing. Both results require optimizing the cumulative distribution function of the random threshold, which are challenging infinite-dimensional optimization problems. We also consider the generalization to multiple knapsacks, where an arriving item has a different size in each knapsack and must be placed in at most one knapsack. We derive a 0.2142-competitive algorithm for this problem. We also show that any randomized algorithm for this problem cannot be more than 0.4605-competitive. This is the first upper bound strictly less than 0.5, which implies the intrinsic challenge of knapsack constraint. We show how to naturally implement our optimal threshold distributions in the warehouses of a Latin American chain department store. We run simulations on their order data, which demonstrate the efficacy of our proposed algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.08735v3-abstract-full').style.display = 'none'; document.getElementById('1907.08735v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.02922">arXiv:1906.02922</a> <span> [<a href="https://arxiv.org/pdf/1906.02922">pdf</a>, <a href="https://arxiv.org/format/1906.02922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Non-Stationary Reinforcement Learning: The Blessing of (More) Optimism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheung%2C+W+C">Wang Chi Cheung</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Ruihao Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.02922v4-abstract-short" style="display: inline;"> We consider un-discounted reinforcement learning (RL) in Markov decision processes (MDPs) under temporal drifts, ie, both the reward and state transition distributions are allowed to evolve over time, as long as their respective total variations, quantified by suitable metrics, do not exceed certain variation budgets. This setting captures the endogeneity, exogeneity, uncertainty, and partial feed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.02922v4-abstract-full').style.display = 'inline'; document.getElementById('1906.02922v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.02922v4-abstract-full" style="display: none;"> We consider un-discounted reinforcement learning (RL) in Markov decision processes (MDPs) under temporal drifts, ie, both the reward and state transition distributions are allowed to evolve over time, as long as their respective total variations, quantified by suitable metrics, do not exceed certain variation budgets. This setting captures the endogeneity, exogeneity, uncertainty, and partial feedback in sequential decision-making scenarios, and finds applications in vehicle remarketing and real-time bidding. We first develop the Sliding Window Upper-Confidence bound for Reinforcement Learning with Confidence Widening (SWUCRL2-CW) algorithm, and establish its dynamic regret bound when the variation budgets are known. In addition, we propose the Bandit-over-Reinforcement Learning (BORL) algorithm to adaptively tune the SWUCRL2-CW algorithm to achieve the same dynamic regret bound, but in a parameter-free manner, ie, without knowing the variation budgets. Finally, we conduct numerical experiments to show that our proposed algorithms achieve superior empirical performance compared to existing algorithms. Notably, the interplay between endogeneity and exogeneity presents a unique challenge, absent in existing (stationary and non-stationary) stochastic online learning settings, when we apply the conventional Optimism in Face of Uncertainty principle to design algorithms with provably low dynamic regret for RL in drifting MDPs. We overcome the challenge by a novel confidence widening technique that incorporates additional optimism into our learning algorithms to ensure low dynamic regret bounds. To extend our theoretical findings, we apply our framework to inventory control problems, and demonstrate how one can alternatively leverage special structures on the state transition distributions to bypass the difficulty in exploring time-varying environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.02922v4-abstract-full').style.display = 'none'; document.getElementById('1906.02922v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.10825">arXiv:1905.10825</a> <span> [<a href="https://arxiv.org/pdf/1905.10825">pdf</a>, <a href="https://arxiv.org/ps/1905.10825">ps</a>, <a href="https://arxiv.org/format/1905.10825">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Phase Transitions in Bandits with Switching Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yunzong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.10825v4-abstract-short" style="display: inline;"> We consider the classical stochastic multi-armed bandit problem with a constraint that limits the total cost incurred by switching between actions to be no larger than a given switching budget. For this problem, we prove matching upper and lower bounds on the optimal (i.e., minimax) regret, and provide efficient rate-optimal algorithms. Surprisingly, the optimal regret of this problem exhibits a n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.10825v4-abstract-full').style.display = 'inline'; document.getElementById('1905.10825v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.10825v4-abstract-full" style="display: none;"> We consider the classical stochastic multi-armed bandit problem with a constraint that limits the total cost incurred by switching between actions to be no larger than a given switching budget. For this problem, we prove matching upper and lower bounds on the optimal (i.e., minimax) regret, and provide efficient rate-optimal algorithms. Surprisingly, the optimal regret of this problem exhibits a non-conventional growth rate in terms of the time horizon and the number of arms. Consequently, we discover surprising "phase transitions" regarding how the optimal regret rate changes with respect to the switching budget: when the number of arms is fixed, there are equal-length phases, where the optimal regret rate remains (almost) the same within each phase and exhibits abrupt changes between phases; when the number of arms grows with the time horizon, such abrupt changes become subtler and may disappear, but a generalized notion of phase transitions involving certain new measurements still exists. The results enable us to fully characterize the trade-off between the regret rate and the incurred switching cost in the stochastic multi-armed bandit problem, contributing new insights to this fundamental problem. Under the general switching cost structure, the results reveal interesting connections between bandit problems and graph traversal problems, such as the shortest Hamiltonian path problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.10825v4-abstract-full').style.display = 'none'; document.getElementById('1905.10825v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">An enhanced version. Many new results are obtained. The presentation is improved</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.04770">arXiv:1905.04770</a> <span> [<a href="https://arxiv.org/pdf/1905.04770">pdf</a>, <a href="https://arxiv.org/format/1905.04770">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Algorithms for Online Matching, Assortment, and Pricing with Tight Weight-dependent Competitive Ratios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Will Ma</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.04770v1-abstract-short" style="display: inline;"> Motivated by the dynamic assortment offerings and item pricings occurring in e-commerce, we study a general problem of allocating finite inventories to heterogeneous customers arriving sequentially. We analyze this problem under the framework of competitive analysis, where the sequence of customers is unknown and does not necessarily follow any pattern. Previous work in this area, studying online… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.04770v1-abstract-full').style.display = 'inline'; document.getElementById('1905.04770v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.04770v1-abstract-full" style="display: none;"> Motivated by the dynamic assortment offerings and item pricings occurring in e-commerce, we study a general problem of allocating finite inventories to heterogeneous customers arriving sequentially. We analyze this problem under the framework of competitive analysis, where the sequence of customers is unknown and does not necessarily follow any pattern. Previous work in this area, studying online matching, advertising, and assortment problems, has focused on the case where each item can only be sold at a single price, resulting in algorithms which achieve the best-possible competitive ratio of 1-1/e. In this paper, we extend all of these results to allow for items having multiple feasible prices. Our algorithms achieve the best-possible weight-dependent competitive ratios, which depend on the sets of feasible prices given in advance. Our algorithms are also simple and intuitive; they are based on constructing a class of universal ``value functions'' which integrate the selection of items and prices offered. Finally, we test our algorithms on the publicly-available hotel data set of Bodea et al. (2009), where there are multiple items (hotel rooms) each with multiple prices (fares at which the room could be sold). We find that applying our algorithms, as a ``hybrid'' with algorithms which attempt to forecast and learn the future transactions, results in the best performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.04770v1-abstract-full').style.display = 'none'; document.getElementById('1905.04770v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.07844">arXiv:1903.07844</a> <span> [<a href="https://arxiv.org/pdf/1903.07844">pdf</a>, <a href="https://arxiv.org/format/1903.07844">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Shrinking the Upper Confidence Bound: A Dynamic Product Selection Problem for Urban Warehouses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+R">Rong Jin</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Li Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinshang Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Sen Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.07844v2-abstract-short" style="display: inline;"> The recent rising popularity of ultra-fast delivery services on retail platforms fuels the increasing use of urban warehouses, whose proximity to customers makes fast deliveries viable. The space limit in urban warehouses poses a problem for the online retailers: the number of products (SKUs) they carry is no longer "the more, the better", yet it can still be significantly large, reaching hundreds… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.07844v2-abstract-full').style.display = 'inline'; document.getElementById('1903.07844v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.07844v2-abstract-full" style="display: none;"> The recent rising popularity of ultra-fast delivery services on retail platforms fuels the increasing use of urban warehouses, whose proximity to customers makes fast deliveries viable. The space limit in urban warehouses poses a problem for the online retailers: the number of products (SKUs) they carry is no longer "the more, the better", yet it can still be significantly large, reaching hundreds or thousands in a product category. In this paper, we study algorithms for dynamically identifying a large number of products (i.e., SKUs) with top customer purchase probabilities on the fly, from an ocean of potential products to offer on retailers' ultra-fast delivery platforms. We distill the product selection problem into a semi-bandit model with linear generalization. There are in total $N$ different arms, each with a feature vector of dimension $d$. The player pulls $K$ arms in each period and observes the bandit feedback from each of the pulled arms. We focus on the setting where $K$ is much greater than the number of total time periods $T$ or the dimension of product features $d$. We first analyze a standard UCB algorithm and show its regret bound can be expressed as the sum of a $T$-independent part $\tilde O(K d^{3/2})$ and a $T$-dependent part $\tilde O(d\sqrt{KT})$, which we refer to as "fixed cost" and "variable cost" respectively. To reduce the fixed cost for large $K$ values, we propose a novel online learning algorithm, which iteratively shrinks the upper confidence bounds within each period, and show its fixed cost is reduced by a factor of $d$ to $\tilde O(K \sqrt{d})$. Moreover, we test the algorithms on an industrial dataset from Alibaba Group. Experimental results show that our new algorithm reduces the total regret of the standard UCB algorithm by at least 10%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.07844v2-abstract-full').style.display = 'none'; document.getElementById('1903.07844v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.01461">arXiv:1903.01461</a> <span> [<a href="https://arxiv.org/pdf/1903.01461">pdf</a>, <a href="https://arxiv.org/format/1903.01461">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Hedging the Drift: Learning to Optimize under Non-Stationarity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheung%2C+W+C">Wang Chi Cheung</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Ruihao Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.01461v4-abstract-short" style="display: inline;"> We introduce data-driven decision-making algorithms that achieve state-of-the-art \emph{dynamic regret} bounds for non-stationary bandit settings. These settings capture applications such as advertisement allocation, dynamic pricing, and traffic network routing in changing environments. We show how the difficulty posed by the (unknown \emph{a priori} and possibly adversarial) non-stationarity can… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.01461v4-abstract-full').style.display = 'inline'; document.getElementById('1903.01461v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.01461v4-abstract-full" style="display: none;"> We introduce data-driven decision-making algorithms that achieve state-of-the-art \emph{dynamic regret} bounds for non-stationary bandit settings. These settings capture applications such as advertisement allocation, dynamic pricing, and traffic network routing in changing environments. We show how the difficulty posed by the (unknown \emph{a priori} and possibly adversarial) non-stationarity can be overcome by an unconventional marriage between stochastic and adversarial bandit learning algorithms. Our main contribution is a general algorithmic recipe for a wide variety of non-stationary bandit problems. Specifically, we design and analyze the sliding window-upper confidence bound algorithm that achieves the optimal dynamic regret bound for each of the settings when we know the respective underlying \emph{variation budget}, which quantifies the total amount of temporal variation of the latent environments. Boosted by the novel bandit-over-bandit framework that adapts to the latent changes, we can further enjoy the (nearly) optimal dynamic regret bounds in a (surprisingly) parameter-free manner. In addition to the classical exploration-exploitation trade-off, our algorithms leverage the power of the "forgetting principle" in the learning processes, which is vital in changing environments. Our extensive numerical experiments on both synthetic and real world online auto-loan datasets show that our proposed algorithms achieve superior empirical performance compared to existing algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.01461v4-abstract-full').style.display = 'none'; document.getElementById('1903.01461v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Journal version of the AISTATS 2019 version (available at arXiv:1810.03024). This version fixed an error in the proof of Theorem 2 with Assumption 4 of arXiv:2103.05750</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1902.10918">arXiv:1902.10918</a> <span> [<a href="https://arxiv.org/pdf/1902.10918">pdf</a>, <a href="https://arxiv.org/format/1902.10918">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Meta Dynamic Pricing: Transfer Learning Across Experiments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bastani%2C+H">Hamsa Bastani</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Ruihao Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1902.10918v4-abstract-short" style="display: inline;"> We study the problem of learning shared structure \emph{across} a sequence of dynamic pricing experiments for related products. We consider a practical formulation where the unknown demand parameters for each product come from an unknown distribution (prior) that is shared across products. We then propose a meta dynamic pricing algorithm that learns this prior online while solving a sequence of Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.10918v4-abstract-full').style.display = 'inline'; document.getElementById('1902.10918v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1902.10918v4-abstract-full" style="display: none;"> We study the problem of learning shared structure \emph{across} a sequence of dynamic pricing experiments for related products. We consider a practical formulation where the unknown demand parameters for each product come from an unknown distribution (prior) that is shared across products. We then propose a meta dynamic pricing algorithm that learns this prior online while solving a sequence of Thompson sampling pricing experiments (each with horizon $T$) for $N$ different products. Our algorithm addresses two challenges: (i) balancing the need to learn the prior (\emph{meta-exploration}) with the need to leverage the estimated prior to achieve good performance (\emph{meta-exploitation}), and (ii) accounting for uncertainty in the estimated prior by appropriately "widening" the estimated prior as a function of its estimation error. We introduce a novel prior alignment technique to analyze the regret of Thompson sampling with a mis-specified prior, which may be of independent interest. Unlike prior-independent approaches, our algorithm's meta regret grows sublinearly in $N$, demonstrating that the price of an unknown prior in Thompson sampling can be negligible in experiment-rich environments (large $N$). Numerical experiments on synthetic and real auto loan data demonstrate that our algorithm significantly speeds up learning compared to prior-independent algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.10918v4-abstract-full').style.display = 'none'; document.getElementById('1902.10918v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1901.02871">arXiv:1901.02871</a> <span> [<a href="https://arxiv.org/pdf/1901.02871">pdf</a>, <a href="https://arxiv.org/format/1901.02871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> The Lingering of Gradients: Theory and Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Allen-Zhu%2C+Z">Zeyuan Allen-Zhu</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinshang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1901.02871v2-abstract-short" style="display: inline;"> Classically, the time complexity of a first-order method is estimated by its number of gradient computations. In this paper, we study a more refined complexity by taking into account the `lingering' of gradients: once a gradient is computed at $x_k$, the additional time to compute gradients at $x_{k+1},x_{k+2},\dots$ may be reduced. We show how this improves the running time of several first-ord… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.02871v2-abstract-full').style.display = 'inline'; document.getElementById('1901.02871v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1901.02871v2-abstract-full" style="display: none;"> Classically, the time complexity of a first-order method is estimated by its number of gradient computations. In this paper, we study a more refined complexity by taking into account the `lingering' of gradients: once a gradient is computed at $x_k$, the additional time to compute gradients at $x_{k+1},x_{k+2},\dots$ may be reduced. We show how this improves the running time of several first-order methods. For instance, if the `additional time' scales linearly with respect to the traveled distance, then the `convergence rate' of gradient descent can be improved from $1/T$ to $\exp(-T^{1/3})$. On the application side, we solve a hypothetical revenue management problem on the Yahoo! Front Page Today Module with 4.6m users to $10^{-6}$ error using only 6 passes of the dataset; and solve a real-life support vector machine problem to an accuracy that is two orders of magnitude better comparing to the state-of-the-art algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.02871v2-abstract-full').style.display = 'none'; document.getElementById('1901.02871v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.01077">arXiv:1811.01077</a> <span> [<a href="https://arxiv.org/pdf/1811.01077">pdf</a>, <a href="https://arxiv.org/format/1811.01077">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Pricing (and Assortment) under a Static Calendar </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Will Ma</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jinglong Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.01077v5-abstract-short" style="display: inline;"> This work is motivated by our collaboration with a large consumer packaged goods (CPG) company. We have found that while the company appreciates the advantages of dynamic pricing, they deem it operationally much easier to plan out a static price calendar in advance. We investigate the efficacy of static control policies for revenue management problems whose optimal solution is inherently dynamic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.01077v5-abstract-full').style.display = 'inline'; document.getElementById('1811.01077v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.01077v5-abstract-full" style="display: none;"> This work is motivated by our collaboration with a large consumer packaged goods (CPG) company. We have found that while the company appreciates the advantages of dynamic pricing, they deem it operationally much easier to plan out a static price calendar in advance. We investigate the efficacy of static control policies for revenue management problems whose optimal solution is inherently dynamic. In these problems, a firm has limited inventory to sell over a finite time horizon, over which heterogeneous customers stochastically arrive. We consider both pricing and assortment controls, and derive simple static policies in the form of a price calendar or a planned sequence of assortments, respectively. In the assortment planning problem, we also differentiate between the static vs. dynamic substitution models of customer demand. We show that our policies are within 1-1/e (approximately 0.63) of the optimum under stationary (IID) demand, and 1/2 of the optimum under non-stationary demand, with both guarantees approaching 1 if the starting inventories are large. We adapt the technique of prophet inequalities from optimal stopping theory to pricing and assortment problems, and our results are relative to the linear programming relaxation. Under the special case of IID single-item pricing, our results improve the understanding of irregular and discrete demand curves, by showing that a static calendar can be (1-1/e)-approximate if the prices are sorted high-to-low. Finally, we demonstrate on both data from the CPG company and synthetic data from the literature that our simple price and assortment calendars are effective. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.01077v5-abstract-full').style.display = 'none'; document.getElementById('1811.01077v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.10900">arXiv:1810.10900</a> <span> [<a href="https://arxiv.org/pdf/1810.10900">pdf</a>, <a href="https://arxiv.org/format/1810.10900">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> On Policies for Single-leg Revenue Management with Limited Demand Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Will Ma</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Teo%2C+C">Chung-Piaw Teo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.10900v2-abstract-short" style="display: inline;"> In this paper we study the single-item revenue management problem, with no information given about the demand trajectory over time. When the item is sold through accepting/rejecting different fare classes, Ball and Queyranne (2009) have established the tight competitive ratio for this problem using booking limit policies, which raise the acceptance threshold as the remaining inventory dwindles. Ho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.10900v2-abstract-full').style.display = 'inline'; document.getElementById('1810.10900v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.10900v2-abstract-full" style="display: none;"> In this paper we study the single-item revenue management problem, with no information given about the demand trajectory over time. When the item is sold through accepting/rejecting different fare classes, Ball and Queyranne (2009) have established the tight competitive ratio for this problem using booking limit policies, which raise the acceptance threshold as the remaining inventory dwindles. However, when the item is sold through dynamic pricing instead, there is the additional challenge that offering a low price may entice high-paying customers to substitute down. We show that despite this challenge, the same competitive ratio can still be achieved using a randomized dynamic pricing policy. Our policy incorporates the price-skimming technique from Eren and Maglaras (2010), but importantly we show how the randomized price distribution should be stochastically-increased as the remaining inventory dwindles. A key technical ingredient in our policy is a new "valuation tracking" subroutine, which tracks the possible values for the optimum, and follows the most "inventory-conservative" control which maintains the desired competitive ratio. Finally, we demonstrate the empirical effectiveness of our policy in simulations, where its average-case performance surpasses all naive modifications of the existing policies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.10900v2-abstract-full').style.display = 'none'; document.getElementById('1810.10900v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.05640">arXiv:1810.05640</a> <span> [<a href="https://arxiv.org/pdf/1810.05640">pdf</a>, <a href="https://arxiv.org/format/1810.05640">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Inventory Balancing with Online Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheung%2C+W+C">Wang Chi Cheung</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Will Ma</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinshang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.05640v2-abstract-short" style="display: inline;"> We study a general problem of allocating limited resources to heterogeneous customers over time under model uncertainty. Each type of customer can be serviced using different actions, each of which stochastically consumes some combination of resources, and returns different rewards for the resources consumed. We consider a general model where the resource consumption distribution associated with e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.05640v2-abstract-full').style.display = 'inline'; document.getElementById('1810.05640v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.05640v2-abstract-full" style="display: none;"> We study a general problem of allocating limited resources to heterogeneous customers over time under model uncertainty. Each type of customer can be serviced using different actions, each of which stochastically consumes some combination of resources, and returns different rewards for the resources consumed. We consider a general model where the resource consumption distribution associated with each (customer type, action)-combination is not known, but is consistent and can be learned over time. In addition, the sequence of customer types to arrive over time is arbitrary and completely unknown. We overcome both the challenges of model uncertainty and customer heterogeneity by judiciously synthesizing two algorithmic frameworks from the literature: inventory balancing, which "reserves" a portion of each resource for high-reward customer types which could later arrive, and online learning, which shows how to "explore" the resource consumption distributions of each customer type under different actions. We define an auxiliary problem, which allows for existing competitive ratio and regret bounds to be seamlessly integrated. Furthermore, we show that the performance guarantee generated by our framework is tight, that is, we provide an information-theoretic lower bound which shows that both the loss from competitive ratio and the loss for regret are relevant in the combined problem. Finally, we demonstrate the efficacy of our algorithms on a publicly available hotel data set. Our framework is highly practical in that it requires no historical data (no fitted customer choice models, nor forecasting of customer arrival patterns) and can be used to initialize allocation strategies in fast-changing environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.05640v2-abstract-full').style.display = 'none'; document.getElementById('1810.05640v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.03024">arXiv:1810.03024</a> <span> [<a href="https://arxiv.org/pdf/1810.03024">pdf</a>, <a href="https://arxiv.org/format/1810.03024">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Learning to Optimize under Non-Stationarity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheung%2C+W+C">Wang Chi Cheung</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Ruihao Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.03024v6-abstract-short" style="display: inline;"> We introduce algorithms that achieve state-of-the-art \emph{dynamic regret} bounds for non-stationary linear stochastic bandit setting. It captures natural applications such as dynamic pricing and ads allocation in a changing environment. We show how the difficulty posed by the non-stationarity can be overcome by a novel marriage between stochastic and adversarial bandits learning algorithms. Defi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.03024v6-abstract-full').style.display = 'inline'; document.getElementById('1810.03024v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.03024v6-abstract-full" style="display: none;"> We introduce algorithms that achieve state-of-the-art \emph{dynamic regret} bounds for non-stationary linear stochastic bandit setting. It captures natural applications such as dynamic pricing and ads allocation in a changing environment. We show how the difficulty posed by the non-stationarity can be overcome by a novel marriage between stochastic and adversarial bandits learning algorithms. Defining $d,B_T,$ and $T$ as the problem dimension, the \emph{variation budget}, and the total time horizon, respectively, our main contributions are the tuned Sliding Window UCB (\texttt{SW-UCB}) algorithm with optimal $\widetilde{O}(d^{2/3}(B_T+1)^{1/3}T^{2/3})$ dynamic regret, and the tuning free bandit-over-bandit (\texttt{BOB}) framework built on top of the \texttt{SW-UCB} algorithm with best $\widetilde{O}(d^{2/3}(B_T+1)^{1/4}T^{3/4})$ dynamic regret. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.03024v6-abstract-full').style.display = 'none'; document.getElementById('1810.03024v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This version fixed an error in the proof of Lemma 1 with Assumption 4 of arXiv:2103.05750</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS 2019) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1709.03683">arXiv:1709.03683</a> <span> [<a href="https://arxiv.org/pdf/1709.03683">pdf</a>, <a href="https://arxiv.org/format/1709.03683">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Practically Competitive and Provably Consistent Algorithm for Uplift Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yan Zhao</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+X">Xiao Fang</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1709.03683v1-abstract-short" style="display: inline;"> Randomized experiments have been critical tools of decision making for decades. However, subjects can show significant heterogeneity in response to treatments in many important applications. Therefore it is not enough to simply know which treatment is optimal for the entire population. What we need is a model that correctly customize treatment assignment base on subject characteristics. The proble… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1709.03683v1-abstract-full').style.display = 'inline'; document.getElementById('1709.03683v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1709.03683v1-abstract-full" style="display: none;"> Randomized experiments have been critical tools of decision making for decades. However, subjects can show significant heterogeneity in response to treatments in many important applications. Therefore it is not enough to simply know which treatment is optimal for the entire population. What we need is a model that correctly customize treatment assignment base on subject characteristics. The problem of constructing such models from randomized experiments data is known as Uplift Modeling in the literature. Many algorithms have been proposed for uplift modeling and some have generated promising results on various data sets. Yet little is known about the theoretical properties of these algorithms. In this paper, we propose a new tree-based ensemble algorithm for uplift modeling. Experiments show that our algorithm can achieve competitive results on both synthetic and industry-provided data. In addition, by properly tuning the "node size" parameter, our algorithm is proved to be consistent under mild regularity conditions. This is the first consistent algorithm for uplift modeling that we are aware of. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1709.03683v1-abstract-full').style.display = 'none'; document.getElementById('1709.03683v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by 2017 IEEE International Conference on Data Mining</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1705.08492">arXiv:1705.08492</a> <span> [<a href="https://arxiv.org/pdf/1705.08492">pdf</a>, <a href="https://arxiv.org/format/1705.08492">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Uplift Modeling with Multiple Treatments and General Response Types </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yan Zhao</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+X">Xiao Fang</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1705.08492v1-abstract-short" style="display: inline;"> Randomized experiments have been used to assist decision-making in many areas. They help people select the optimal treatment for the test population with certain statistical guarantee. However, subjects can show significant heterogeneity in response to treatments. The problem of customizing treatment assignment based on subject characteristics is known as uplift modeling, differential response ana… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.08492v1-abstract-full').style.display = 'inline'; document.getElementById('1705.08492v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1705.08492v1-abstract-full" style="display: none;"> Randomized experiments have been used to assist decision-making in many areas. They help people select the optimal treatment for the test population with certain statistical guarantee. However, subjects can show significant heterogeneity in response to treatments. The problem of customizing treatment assignment based on subject characteristics is known as uplift modeling, differential response analysis, or personalized treatment learning in literature. A key feature for uplift modeling is that the data is unlabeled. It is impossible to know whether the chosen treatment is optimal for an individual subject because response under alternative treatments is unobserved. This presents a challenge to both the training and the evaluation of uplift models. In this paper we describe how to obtain an unbiased estimate of the key performance metric of an uplift model, the expected response. We present a new uplift algorithm which creates a forest of randomized trees. The trees are built with a splitting criterion designed to directly optimize their uplift performance based on the proposed evaluation method. Both the evaluation method and the algorithm apply to arbitrary number of treatments and general response types. Experimental results on synthetic data and industry-provided data show that our algorithm leads to significant performance improvement over other applicable methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.08492v1-abstract-full').style.display = 'none'; document.getElementById('1705.08492v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1704.00108">arXiv:1704.00108</a> <span> [<a href="https://arxiv.org/pdf/1704.00108">pdf</a>, <a href="https://arxiv.org/ps/1704.00108">ps</a>, <a href="https://arxiv.org/format/1704.00108">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Assortment Optimization under Unknown MultiNomial Logit Choice Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheung%2C+W+C">Wang Chi Cheung</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1704.00108v1-abstract-short" style="display: inline;"> Motivated by e-commerce, we study the online assortment optimization problem. The seller offers an assortment, i.e. a subset of products, to each arriving customer, who then purchases one or no product from her offered assortment. A customer's purchase decision is governed by the underlying MultiNomial Logit (MNL) choice model. The seller aims to maximize the total revenue in a finite sales horizo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1704.00108v1-abstract-full').style.display = 'inline'; document.getElementById('1704.00108v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1704.00108v1-abstract-full" style="display: none;"> Motivated by e-commerce, we study the online assortment optimization problem. The seller offers an assortment, i.e. a subset of products, to each arriving customer, who then purchases one or no product from her offered assortment. A customer's purchase decision is governed by the underlying MultiNomial Logit (MNL) choice model. The seller aims to maximize the total revenue in a finite sales horizon, subject to resource constraints and uncertainty in the MNL choice model. We first propose an efficient online policy which incurs a regret $\tilde{O}(T^{2/3})$, where $T$ is the number of customers in the sales horizon. Then, we propose a UCB policy that achieves a regret $\tilde{O}(T^{1/2})$. Both regret bounds are sublinear in the number of assortments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1704.00108v1-abstract-full').style.display = 'none'; document.getElementById('1704.00108v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1512.02300">arXiv:1512.02300</a> <span> [<a href="https://arxiv.org/pdf/1512.02300">pdf</a>, <a href="https://arxiv.org/ps/1512.02300">ps</a>, <a href="https://arxiv.org/format/1512.02300">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Reaping the Benefits of Bundling under High Production Costs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Will Ma</a>, <a href="/search/cs?searchtype=author&query=Simchi-Levi%2C+D">David Simchi-Levi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1512.02300v4-abstract-short" style="display: inline;"> It is well-known that selling different goods in a single bundle can significantly increase revenue. However, bundling is no longer profitable if the goods have high production costs. To overcome this challenge, we introduce a new mechanism, Pure Bundling with Disposal for Cost (PBDC), where after buying the bundle, the customer is allowed to return any subset of goods for their costs. We provid… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.02300v4-abstract-full').style.display = 'inline'; document.getElementById('1512.02300v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1512.02300v4-abstract-full" style="display: none;"> It is well-known that selling different goods in a single bundle can significantly increase revenue. However, bundling is no longer profitable if the goods have high production costs. To overcome this challenge, we introduce a new mechanism, Pure Bundling with Disposal for Cost (PBDC), where after buying the bundle, the customer is allowed to return any subset of goods for their costs. We provide two types of guarantees on the profit of PBDC mechanisms relative to the optimum in the presence of production costs, under the assumption that customers have valuations which are additive over the items and drawn independently. We first provide a distribution-dependent guarantee which shows that PBDC earns at least 1-6c^{2/3} of the optimal profit, where c denotes the coefficient of variation of the welfare random variable. c approaches 0 if there are a large number of items whose individual valuations have bounded coefficients of variation, and our constants improve upon those from the classical result of Bakos and Brynjolfsson (1999) without costs. We then provide a distribution-free guarantee which shows that either PBDC or individual sales earns at least 1/5.2 times the optimal profit, generalizing and improving the constant of 1/6 from the celebrated result of Babaioff et al. (2014). Conversely, we also provide the best-known upper bound on the performance of any partitioning mechanism (which captures both individual sales and pure bundling), of 1/1.19 times the optimal profit, improving on the previously-known upper bound of 1/1.08. Finally, we conduct simulations under the same playing field as the extensive numerical study of Chu et al. (2011), which confirm that PBDC outperforms other simple pricing schemes overall. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.02300v4-abstract-full').style.display = 'none'; document.getElementById('1512.02300v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 December, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2015. </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>