CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;22 of 22 results for author: <span class="mathjax">Wynter, L</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Wynter%2C+L">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Wynter, L"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Wynter%2C+L&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Wynter, L"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14753">arXiv:2410.14753</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.14753">pdf</a>, <a href="https://arxiv.org/format/2410.14753">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Collaboratively adding new knowledge to an LLM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+R+D">Rhui Dih Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14753v2-abstract-short" style="display: inline;"> We address the question of how to successively add new knowledge to an LLM whilst retaining previously-added knowledge. We consider two settings, semi-cooperative and fully-cooperative. Overall, LoRA performs better in most cases than full-fine tuning of all parameters when both new knowledge acquisition and retention of old, including recent, knowledge are taken into account. In the semi-cooperat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14753v2-abstract-full').style.display = 'inline'; document.getElementById('2410.14753v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14753v2-abstract-full" style="display: none;"> We address the question of how to successively add new knowledge to an LLM whilst retaining previously-added knowledge. We consider two settings, semi-cooperative and fully-cooperative. Overall, LoRA performs better in most cases than full-fine tuning of all parameters when both new knowledge acquisition and retention of old, including recent, knowledge are taken into account. In the semi-cooperative setting, where datasets are not available after training, MOE mixing, model merging, and LoRA-based orthogonal subspace sequential learning, using a small weight on the orthogonality term, perform well. In the fully-cooperative setting where datasets remain available, joint training and sequential training with replay are both effective approaches with LoRA training generally preferable to full fine-tuning. The codes needed to reproduce the results are provided in an open source repository. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14753v2-abstract-full').style.display = 'none'; document.getElementById('2410.14753v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.17280">arXiv:2408.17280</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.17280">pdf</a>, <a href="https://arxiv.org/format/2408.17280">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Flexible and Effective Mixing of Large Language Models into a Mixture of Domain Experts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+R+D">Rhui Dih Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Ganti%2C+R+K">Raghu Kiran Ganti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.17280v2-abstract-short" style="display: inline;"> We present a toolkit for creating low-cost Mixture-of-Domain-Experts (MOE) from trained models. The toolkit can be used for creating a mixture from models or from adapters. We perform extensive tests and offer guidance on defining the architecture of the resulting MOE using the toolkit. A public repository is available. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.17280v2-abstract-full" style="display: none;"> We present a toolkit for creating low-cost Mixture-of-Domain-Experts (MOE) from trained models. The toolkit can be used for creating a mixture from models or from adapters. We perform extensive tests and offer guidance on defining the architecture of the resulting MOE using the toolkit. A public repository is available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.17280v2-abstract-full').style.display = 'none'; document.getElementById('2408.17280v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09105">arXiv:2407.09105</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.09105">pdf</a>, <a href="https://arxiv.org/format/2407.09105">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Training Efficiency Using Packing with Flash Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kundu%2C+A">Achintya Kundu</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+R+D">Rhui Dih Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Ganti%2C+R+K">Raghu Kiran Ganti</a>, <a href="/search/cs?searchtype=author&amp;query=Mishra%2C+M">Mayank Mishra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09105v6-abstract-short" style="display: inline;"> Padding is often used in tuning LLM models by adding special tokens to shorter training examples to match the length of the longest sequence in each batch. While this ensures uniformity for batch processing, it introduces inefficiencies by including irrelevant padding tokens in the computation and wastes GPU resources. Hugging Face SFT trainer has always offered the option to use packing to combin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09105v6-abstract-full').style.display = 'inline'; document.getElementById('2407.09105v6-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09105v6-abstract-full" style="display: none;"> Padding is often used in tuning LLM models by adding special tokens to shorter training examples to match the length of the longest sequence in each batch. While this ensures uniformity for batch processing, it introduces inefficiencies by including irrelevant padding tokens in the computation and wastes GPU resources. Hugging Face SFT trainer has always offered the option to use packing to combine multiple training examples, allowing for maximal utilization of GPU resources. However, up till now, it did not offer proper masking of each packed training example. This capability has been added to Hugging Face Transformers 4.44. We analyse this new feature and show the benefits across different variations of packing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09105v6-abstract-full').style.display = 'none'; document.getElementById('2407.09105v6-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01353">arXiv:2404.01353</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.01353">pdf</a>, <a href="https://arxiv.org/format/2404.01353">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Efficiently Distilling LLMs for Edge Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kundu%2C+A">Achintya Kundu</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+F">Fabian Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Chew%2C+A">Aaron Chew</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Chong%2C+P">Penny Chong</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+R+D">Rhui Dih Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01353v1-abstract-short" style="display: inline;"> Supernet training of LLMs is of great interest in industrial applications as it confers the ability to produce a palette of smaller models at constant cost, regardless of the number of models (of different size / latency) produced. We propose a new method called Multistage Low-rank Fine-tuning of Super-transformers (MLFS) for parameter-efficient supernet training. We show that it is possible to ob&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01353v1-abstract-full').style.display = 'inline'; document.getElementById('2404.01353v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01353v1-abstract-full" style="display: none;"> Supernet training of LLMs is of great interest in industrial applications as it confers the ability to produce a palette of smaller models at constant cost, regardless of the number of models (of different size / latency) produced. We propose a new method called Multistage Low-rank Fine-tuning of Super-transformers (MLFS) for parameter-efficient supernet training. We show that it is possible to obtain high-quality encoder models that are suitable for commercial edge applications, and that while decoder-only models are resistant to a comparable degree of compression, decoders can be effectively sliced for a significant reduction in training time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01353v1-abstract-full').style.display = 'none'; document.getElementById('2404.01353v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted for publication in NAACL 2024 (Industry Track)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.15485">arXiv:2303.15485</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.15485">pdf</a>, <a href="https://arxiv.org/format/2303.15485">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Transfer-Once-For-All: AI Model Optimization for Edge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kundu%2C+A">Achintya Kundu</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+R+D">Rhui Dih Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Bathen%2C+L+A">Luis Angel Bathen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.15485v2-abstract-short" style="display: inline;"> Weight-sharing neural architecture search aims to optimize a configurable neural network model (supernet) for a variety of deployment scenarios across many devices with different resource constraints. Existing approaches use evolutionary search to extract models of different sizes from a supernet trained on a very large data set, and then fine-tune the extracted models on the typically small, real&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.15485v2-abstract-full').style.display = 'inline'; document.getElementById('2303.15485v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.15485v2-abstract-full" style="display: none;"> Weight-sharing neural architecture search aims to optimize a configurable neural network model (supernet) for a variety of deployment scenarios across many devices with different resource constraints. Existing approaches use evolutionary search to extract models of different sizes from a supernet trained on a very large data set, and then fine-tune the extracted models on the typically small, real-world data set of interest. The computational cost of training thus grows linearly with the number of different model deployment scenarios. Hence, we propose Transfer-Once-For-All (TOFA) for supernet-style training on small data sets with constant computational training cost over any number of edge deployment scenarios. Given a task, TOFA obtains custom neural networks, both the topology and the weights, optimized for any number of edge deployment scenarios. To overcome the challenges arising from small data, TOFA utilizes a unified semi-supervised training loss to simultaneously train all subnets within the supernet, coupled with on-the-fly architecture selection at deployment time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.15485v2-abstract-full').style.display = 'none'; document.getElementById('2303.15485v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.13436">arXiv:2202.13436</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2202.13436">pdf</a>, <a href="https://arxiv.org/format/2202.13436">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Neural-Progressive Hedging: Enforcing Constraints in Reinforcement Learning with Stochastic Programming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ghosh%2C+S">Supriyo Ghosh</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+S+H">Shiau Hong Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+D+T">Duc Thien Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.13436v1-abstract-short" style="display: inline;"> We propose a framework, called neural-progressive hedging (NP), that leverages stochastic programming during the online phase of executing a reinforcement learning (RL) policy. The goal is to ensure feasibility with respect to constraints and risk-based objectives such as conditional value-at-risk (CVaR) during the execution of the policy, using probabilistic models of the state transitions to gui&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.13436v1-abstract-full').style.display = 'inline'; document.getElementById('2202.13436v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.13436v1-abstract-full" style="display: none;"> We propose a framework, called neural-progressive hedging (NP), that leverages stochastic programming during the online phase of executing a reinforcement learning (RL) policy. The goal is to ensure feasibility with respect to constraints and risk-based objectives such as conditional value-at-risk (CVaR) during the execution of the policy, using probabilistic models of the state transitions to guide policy adjustments. The framework is particularly amenable to the class of sequential resource allocation problems since feasibility with respect to typical resource constraints cannot be enforced in a scalable manner. The NP framework provides an alternative that adds modest overhead during the online phase. Experimental results demonstrate the efficacy of the NP framework on two continuous real-world tasks: (i) the portfolio optimization problem with liquidity constraints for financial planning, characterized by non-stationary state distributions; and (ii) the dynamic repositioning problem in bike sharing systems, that embodies the class of supply-demand matching problems. We show that the NP framework produces policies that are better than deep RL and other baseline approaches, adapting to non-stationarity, whilst satisfying structural constraints and accommodating risk measures in the resulting policies. Additional benefits of the NP framework are ease of implementation and better explainability of the policies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.13436v1-abstract-full').style.display = 'none'; document.getElementById('2202.13436v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.07275">arXiv:2110.07275</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.07275">pdf</a>, <a href="https://arxiv.org/format/2110.07275">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Order Constraints in Optimal Transport </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lim%2C+F">Fabian Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+S+H">Shiau Hong Lim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.07275v2-abstract-short" style="display: inline;"> Optimal transport is a framework for comparing measures whereby a cost is incurred for transporting one measure to another. Recent works have aimed to improve optimal transport plans through the introduction of various forms of structure. We introduce novel order constraints into the optimal transport formulation to allow for the incorporation of structure. We define an efficient method for obtain&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.07275v2-abstract-full').style.display = 'inline'; document.getElementById('2110.07275v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.07275v2-abstract-full" style="display: none;"> Optimal transport is a framework for comparing measures whereby a cost is incurred for transporting one measure to another. Recent works have aimed to improve optimal transport plans through the introduction of various forms of structure. We introduce novel order constraints into the optimal transport formulation to allow for the incorporation of structure. We define an efficient method for obtaining explainable solutions to the new formulation that scales far better than standard approaches. The theoretical properties of the method are provided. We demonstrate experimentally that order constraints improve explainability using the e-SNLI (Stanford Natural Language Inference) dataset that includes human-annotated rationales as well as on several image color transfer examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.07275v2-abstract-full').style.display = 'none'; document.getElementById('2110.07275v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in Proceedings of ICML 2022. Main Paper + Supplementary</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.09745">arXiv:2102.09745</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2102.09745">pdf</a>, <a href="https://arxiv.org/format/2102.09745">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Decentralized Deterministic Multi-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Grosnit%2C+A">Antoine Grosnit</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+D">Desmond Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.09745v1-abstract-short" style="display: inline;"> [Zhang, ICML 2018] provided the first decentralized actor-critic algorithm for multi-agent reinforcement learning (MARL) that offers convergence guarantees. In that work, policies are stochastic and are defined on finite action spaces. We extend those results to offer a provably-convergent decentralized actor-critic algorithm for learning deterministic policies on continuous action spaces. Determi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.09745v1-abstract-full').style.display = 'inline'; document.getElementById('2102.09745v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.09745v1-abstract-full" style="display: none;"> [Zhang, ICML 2018] provided the first decentralized actor-critic algorithm for multi-agent reinforcement learning (MARL) that offers convergence guarantees. In that work, policies are stochastic and are defined on finite action spaces. We extend those results to offer a provably-convergent decentralized actor-critic algorithm for learning deterministic policies on continuous action spaces. Deterministic policies are important in real-world settings. To handle the lack of exploration inherent in deterministic policies, we consider both off-policy and on-policy settings. We provide the expression of a local deterministic policy gradient, decentralized deterministic actor-critic algorithms and convergence guarantees for linearly-approximated value functions. This work will help enable decentralized MARL in high-dimensional action spaces and pave the way for more widespread use of MARL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.09745v1-abstract-full').style.display = 'none'; document.getElementById('2102.09745v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.09361">arXiv:2102.09361</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2102.09361">pdf</a>, <a href="https://arxiv.org/format/2102.09361">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Efficient Reinforcement Learning in Resource Allocation Problems Through Permutation Invariant Multi-task Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cai%2C+D">Desmond Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+S+H">Shiau Hong Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.09361v1-abstract-short" style="display: inline;"> One of the main challenges in real-world reinforcement learning is to learn successfully from limited training samples. We show that in certain settings, the available data can be dramatically increased through a form of multi-task learning, by exploiting an invariance property in the tasks. We provide a theoretical performance bound for the gain in sample efficiency under this setting. This motiv&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.09361v1-abstract-full').style.display = 'inline'; document.getElementById('2102.09361v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.09361v1-abstract-full" style="display: none;"> One of the main challenges in real-world reinforcement learning is to learn successfully from limited training samples. We show that in certain settings, the available data can be dramatically increased through a form of multi-task learning, by exploiting an invariance property in the tasks. We provide a theoretical performance bound for the gain in sample efficiency under this setting. This motivates a new approach to multi-task learning, which involves the design of an appropriate neural network architecture and a prioritized task-sampling strategy. We demonstrate empirically the effectiveness of the proposed approach on two real-world sequential resource allocation tasks where this invariance property occurs: financial portfolio optimization and meta federated learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.09361v1-abstract-full').style.display = 'none'; document.getElementById('2102.09361v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.06171">arXiv:2101.06171</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2101.06171">pdf</a>, <a href="https://arxiv.org/format/2101.06171">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Probabilistic Inference for Learning from Untrusted Sources </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+D+T">Duc Thien Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+S+H">Shiau Hoong Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+D">Desmond Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.06171v1-abstract-short" style="display: inline;"> Federated learning brings potential benefits of faster learning, better solutions, and a greater propensity to transfer when heterogeneous data from different parties increases diversity. However, because federated learning tasks tend to be large and complex, and training times non-negligible, it is important for the aggregation algorithm to be robust to non-IID data and corrupted parties. This ro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.06171v1-abstract-full').style.display = 'inline'; document.getElementById('2101.06171v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.06171v1-abstract-full" style="display: none;"> Federated learning brings potential benefits of faster learning, better solutions, and a greater propensity to transfer when heterogeneous data from different parties increases diversity. However, because federated learning tasks tend to be large and complex, and training times non-negligible, it is important for the aggregation algorithm to be robust to non-IID data and corrupted parties. This robustness relies on the ability to identify, and appropriately weight, incompatible parties. Recent work assumes that a \textit{reference dataset} is available through which to perform the identification. We consider settings where no such reference dataset is available; rather, the quality and suitability of the parties needs to be \textit{inferred}. We do so by bringing ideas from crowdsourced predictions and collaborative filtering, where one must infer an unknown ground truth given proposals from participants with unknown quality. We propose novel federated learning aggregation algorithms based on Bayesian inference that adapt to the quality of the parties. Empirically, we show that the algorithms outperform standard and robust aggregation in federated learning on both synthetic and real data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.06171v1-abstract-full').style.display = 'none'; document.getElementById('2101.06171v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.06303">arXiv:2009.06303</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2009.06303">pdf</a>, <a href="https://arxiv.org/format/2009.06303">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Robustness and Personalization in Federated Learning: A Unified Approach via Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kundu%2C+A">Achintya Kundu</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+P">Pengqian Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+S+H">Shiau Hong Lim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.06303v3-abstract-short" style="display: inline;"> We present a class of methods for robust, personalized federated learning, called Fed+, that unifies many federated learning algorithms. The principal advantage of this class of methods is to better accommodate the real-world characteristics found in federated training, such as the lack of IID data across parties, the need for robustness to outliers or stragglers, and the requirement to perform we&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.06303v3-abstract-full').style.display = 'inline'; document.getElementById('2009.06303v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.06303v3-abstract-full" style="display: none;"> We present a class of methods for robust, personalized federated learning, called Fed+, that unifies many federated learning algorithms. The principal advantage of this class of methods is to better accommodate the real-world characteristics found in federated training, such as the lack of IID data across parties, the need for robustness to outliers or stragglers, and the requirement to perform well on party-specific datasets. We achieve this through a problem formulation that allows the central server to employ robust ways of aggregating the local models while keeping the structure of local computation intact. Without making any statistical assumption on the degree of heterogeneity of local data across parties, we provide convergence guarantees for Fed+ for convex and non-convex loss functions under different (robust) aggregation methods. The Fed+ theory is also equipped to handle heterogeneous computing environments including stragglers without additional assumptions; specifically, the convergence results cover the general setting where the number of local update steps across parties can vary. We demonstrate the benefits of Fed+ through extensive experiments across standard benchmark datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.06303v3-abstract-full').style.display = 'none'; document.getElementById('2009.06303v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE EDGE 2022 (16 pages, 4 figures, 2 tables)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.10987">arXiv:2007.10987</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2007.10987">pdf</a>, <a href="https://arxiv.org/format/2007.10987">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> IBM Federated Learning: an Enterprise Framework White Paper V0.1 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ludwig%2C+H">Heiko Ludwig</a>, <a href="/search/cs?searchtype=author&amp;query=Baracaldo%2C+N">Nathalie Baracaldo</a>, <a href="/search/cs?searchtype=author&amp;query=Thomas%2C+G">Gegi Thomas</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yi Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Anwar%2C+A">Ali Anwar</a>, <a href="/search/cs?searchtype=author&amp;query=Rajamoni%2C+S">Shashank Rajamoni</a>, <a href="/search/cs?searchtype=author&amp;query=Ong%2C+Y">Yuya Ong</a>, <a href="/search/cs?searchtype=author&amp;query=Radhakrishnan%2C+J">Jayaram Radhakrishnan</a>, <a href="/search/cs?searchtype=author&amp;query=Verma%2C+A">Ashish Verma</a>, <a href="/search/cs?searchtype=author&amp;query=Sinn%2C+M">Mathieu Sinn</a>, <a href="/search/cs?searchtype=author&amp;query=Purcell%2C+M">Mark Purcell</a>, <a href="/search/cs?searchtype=author&amp;query=Rawat%2C+A">Ambrish Rawat</a>, <a href="/search/cs?searchtype=author&amp;query=Minh%2C+T">Tran Minh</a>, <a href="/search/cs?searchtype=author&amp;query=Holohan%2C+N">Naoise Holohan</a>, <a href="/search/cs?searchtype=author&amp;query=Chakraborty%2C+S">Supriyo Chakraborty</a>, <a href="/search/cs?searchtype=author&amp;query=Whitherspoon%2C+S">Shalisha Whitherspoon</a>, <a href="/search/cs?searchtype=author&amp;query=Steuer%2C+D">Dean Steuer</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Hassan%2C+H">Hifaz Hassan</a>, <a href="/search/cs?searchtype=author&amp;query=Laguna%2C+S">Sean Laguna</a>, <a href="/search/cs?searchtype=author&amp;query=Yurochkin%2C+M">Mikhail Yurochkin</a>, <a href="/search/cs?searchtype=author&amp;query=Agarwal%2C+M">Mayank Agarwal</a>, <a href="/search/cs?searchtype=author&amp;query=Chuba%2C+E">Ebube Chuba</a>, <a href="/search/cs?searchtype=author&amp;query=Abay%2C+A">Annie Abay</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.10987v1-abstract-short" style="display: inline;"> Federated Learning (FL) is an approach to conduct machine learning without centralizing training data in a single place, for reasons of privacy, confidentiality or data volume. However, solving federated machine learning problems raises issues above and beyond those of centralized machine learning. These issues include setting up communication infrastructure between parties, coordinating the learn&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.10987v1-abstract-full').style.display = 'inline'; document.getElementById('2007.10987v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.10987v1-abstract-full" style="display: none;"> Federated Learning (FL) is an approach to conduct machine learning without centralizing training data in a single place, for reasons of privacy, confidentiality or data volume. However, solving federated machine learning problems raises issues above and beyond those of centralized machine learning. These issues include setting up communication infrastructure between parties, coordinating the learning process, integrating party results, understanding the characteristics of the training data sets of different participating parties, handling data heterogeneity, and operating with the absence of a verification data set. IBM Federated Learning provides infrastructure and coordination for federated learning. Data scientists can design and run federated learning jobs based on existing, centralized machine learning models and can provide high-level instructions on how to run the federation. The framework applies to both Deep Neural Networks as well as ``traditional&#39;&#39; approaches for the most common machine learning libraries. {\proj} enables data scientists to expand their scope from centralized to federated machine learning, minimizing the learning curve at the outset while also providing the flexibility to deploy to different compute environments and design custom fusion algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.10987v1-abstract-full').style.display = 'none'; document.getElementById('2007.10987v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; I.2.11 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.00778">arXiv:2006.00778</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2006.00778">pdf</a>, <a href="https://arxiv.org/format/2006.00778">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Variational Bayesian Inference for Crowdsourcing Predictions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cai%2C+D">Desmond Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+D+T">Duc Thien Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+S+H">Shiau Hong Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.00778v2-abstract-short" style="display: inline;"> Crowdsourcing has emerged as an effective means for performing a number of machine learning tasks such as annotation and labelling of images and other data sets. In most early settings of crowdsourcing, the task involved classification, that is assigning one of a discrete set of labels to each task. Recently, however, more complex tasks have been attempted including asking crowdsource workers to a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.00778v2-abstract-full').style.display = 'inline'; document.getElementById('2006.00778v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.00778v2-abstract-full" style="display: none;"> Crowdsourcing has emerged as an effective means for performing a number of machine learning tasks such as annotation and labelling of images and other data sets. In most early settings of crowdsourcing, the task involved classification, that is assigning one of a discrete set of labels to each task. Recently, however, more complex tasks have been attempted including asking crowdsource workers to assign continuous labels, or predictions. In essence, this involves the use of crowdsourcing for function estimation. We are motivated by this problem to drive applications such as collaborative prediction, that is, harnessing the wisdom of the crowd to predict quantities more accurately. To do so, we propose a Bayesian approach aimed specifically at alleviating overfitting, a typical impediment to accurate prediction models in practice. In particular, we develop a variational Bayesian technique for two different worker noise models - one that assumes workers&#39; noises are independent and the other that assumes workers&#39; noises have a latent low-rank structure. Our evaluations on synthetic and real-world datasets demonstrate that these Bayesian approaches perform significantly better than existing non-Bayesian approaches and are thus potentially useful for this class of crowdsourcing problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.00778v2-abstract-full').style.display = 'none'; document.getElementById('2006.00778v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.01387">arXiv:2004.01387</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.01387">pdf</a>, <a href="https://arxiv.org/format/2004.01387">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Deep Ensemble Multi-Agent Reinforcement Learning Approach for Air Traffic Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ghosh%2C+S">Supriyo Ghosh</a>, <a href="/search/cs?searchtype=author&amp;query=Laguna%2C+S">Sean Laguna</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+S+H">Shiau Hong Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Poonawala%2C+H">Hasan Poonawala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.01387v1-abstract-short" style="display: inline;"> Air traffic control is an example of a highly challenging operational problem that is readily amenable to human expertise augmentation via decision support technologies. In this paper, we propose a new intelligent decision making framework that leverages multi-agent reinforcement learning (MARL) to dynamically suggest adjustments of aircraft speeds in real-time. The goal of the system is to enhanc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.01387v1-abstract-full').style.display = 'inline'; document.getElementById('2004.01387v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.01387v1-abstract-full" style="display: none;"> Air traffic control is an example of a highly challenging operational problem that is readily amenable to human expertise augmentation via decision support technologies. In this paper, we propose a new intelligent decision making framework that leverages multi-agent reinforcement learning (MARL) to dynamically suggest adjustments of aircraft speeds in real-time. The goal of the system is to enhance the ability of an air traffic controller to provide effective guidance to aircraft to avoid air traffic congestion, near-miss situations, and to improve arrival timeliness. We develop a novel deep ensemble MARL method that can concisely capture the complexity of the air traffic control problem by learning to efficiently arbitrate between the decisions of a local kernel-based RL model and a wider-reaching deep MARL model. The proposed method is trained and evaluated on an open-source air traffic management simulator developed by Eurocontrol. Extensive empirical results on a real-world dataset including thousands of aircraft demonstrate the feasibility of using multi-agent RL for the problem of en-route air traffic control and show that our proposed deep ensemble MARL method significantly outperforms three state-of-the-art benchmark approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.01387v1-abstract-full').style.display = 'none'; document.getElementById('2004.01387v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.03040">arXiv:1906.03040</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1906.03040">pdf</a>, <a href="https://arxiv.org/format/1906.03040">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> FASTER: Fusion AnalyticS for public Transport Event Response </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Blandin%2C+S">Sebastien Blandin</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Poonawala%2C+H">Hasan Poonawala</a>, <a href="/search/cs?searchtype=author&amp;query=Laguna%2C+S">Sean Laguna</a>, <a href="/search/cs?searchtype=author&amp;query=Dura%2C+B">Basile Dura</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.03040v1-abstract-short" style="display: inline;"> Increasing urban concentration raises operational challenges that can benefit from integrated monitoring and decision support. Such complex systems need to leverage the full stack of analytical methods, from state estimation using multi-sensor fusion for situational awareness, to prediction and computation of optimal responses. The FASTER platform that we describe in this work, deployed at nation&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.03040v1-abstract-full').style.display = 'inline'; document.getElementById('1906.03040v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.03040v1-abstract-full" style="display: none;"> Increasing urban concentration raises operational challenges that can benefit from integrated monitoring and decision support. Such complex systems need to leverage the full stack of analytical methods, from state estimation using multi-sensor fusion for situational awareness, to prediction and computation of optimal responses. The FASTER platform that we describe in this work, deployed at nation scale and handling 1.5 billion public transport trips a year, offers such a full stack of techniques for this large-scale, real-time problem. FASTER provides fine-grained situational awareness and real-time decision support with the objective of improving the public transport commuter experience. The methods employed range from statistical machine learning to agent-based simulation and mixed-integer optimization. In this work we present an overview of the challenges and methods involved, with details of the commuter movement prediction module, as well as a discussion of open problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.03040v1-abstract-full').style.display = 'none'; document.getElementById('1906.03040v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.10180">arXiv:1904.10180</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1904.10180">pdf</a>, <a href="https://arxiv.org/format/1904.10180">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> High-frequency crowd insights for public safety and congestion control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nandakumar%2C+K">Karthik Nandakumar</a>, <a href="/search/cs?searchtype=author&amp;query=Blandin%2C+S">Sebastien Blandin</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.10180v1-abstract-short" style="display: inline;"> We present results from several projects aimed at enabling the real-time understanding of crowds and their behaviour in the built environment. We make use of CCTV video cameras that are ubiquitous throughout the developed and developing world and as such are able to play the role of a reliable sensing mechanism. We outline the novel methods developed for our crowd insights engine, and illustrate e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.10180v1-abstract-full').style.display = 'inline'; document.getElementById('1904.10180v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.10180v1-abstract-full" style="display: none;"> We present results from several projects aimed at enabling the real-time understanding of crowds and their behaviour in the built environment. We make use of CCTV video cameras that are ubiquitous throughout the developed and developing world and as such are able to play the role of a reliable sensing mechanism. We outline the novel methods developed for our crowd insights engine, and illustrate examples of its use in different contexts in the urban landscape. Applications of the technology range from maintaining security in public spaces to quantifying the adequacy of public transport level of service. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.10180v1-abstract-full').style.display = 'none'; document.getElementById('1904.10180v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.01045">arXiv:1903.01045</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1903.01045">pdf</a>, <a href="https://arxiv.org/format/1903.01045">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Robust commuter movement inference from connected mobile devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+B">Baoyang Song</a>, <a href="/search/cs?searchtype=author&amp;query=Poonawala%2C+H">Hasan Poonawala</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Blandin%2C+S">Sebastien Blandin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.01045v1-abstract-short" style="display: inline;"> The preponderance of connected devices provides unprecedented opportunities for fine-grained monitoring of the public infrastructure. However while classical models expect high quality application-specific data streams, the promise of the Internet of Things (IoT) is that of an abundance of disparate and noisy datasets from connected devices. In this context, we consider the problem of estimation o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.01045v1-abstract-full').style.display = 'inline'; document.getElementById('1903.01045v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.01045v1-abstract-full" style="display: none;"> The preponderance of connected devices provides unprecedented opportunities for fine-grained monitoring of the public infrastructure. However while classical models expect high quality application-specific data streams, the promise of the Internet of Things (IoT) is that of an abundance of disparate and noisy datasets from connected devices. In this context, we consider the problem of estimation of the level of service of a city-wide public transport network. We first propose a robust unsupervised model for train movement inference from wifi traces, via the application of robust clustering methods to a one dimensional spatio-temporal setting. We then explore the extent to which the demand-supply gap can be estimated from connected devices. We propose a classification model of real-time commuter patterns, including both a batch training phase and an online learning component. We describe our deployment architecture and assess our system accuracy on a large-scale anonymized dataset comprising more than 10 billion records. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.01045v1-abstract-full').style.display = 'none'; document.getElementById('1903.01045v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">International Conference on Data Mining 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1902.10887">arXiv:1902.10887</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1902.10887">pdf</a>, <a href="https://arxiv.org/format/1902.10887">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Robust ResNet: A Small Step but A Giant Leap </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jingfeng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+B">Bo Han</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Low%2C+K+H">Kian Hsiang Low</a>, <a href="/search/cs?searchtype=author&amp;query=Kankanhalli%2C+M">Mohan Kankanhalli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1902.10887v3-abstract-short" style="display: inline;"> This paper presents a simple yet principled approach to boosting the robustness of the residual network (ResNet) that is motivated by the dynamical system perspective. Namely, a deep neural network can be interpreted using a partial differential equation, which naturally inspires us to characterize ResNet by an explicit Euler method. Our analytical studies reveal that the step factor h in the Eule&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.10887v3-abstract-full').style.display = 'inline'; document.getElementById('1902.10887v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1902.10887v3-abstract-full" style="display: none;"> This paper presents a simple yet principled approach to boosting the robustness of the residual network (ResNet) that is motivated by the dynamical system perspective. Namely, a deep neural network can be interpreted using a partial differential equation, which naturally inspires us to characterize ResNet by an explicit Euler method. Our analytical studies reveal that the step factor h in the Euler method is able to control the robustness of ResNet in both its training and generalization. Specifically, we prove that a small step factor h can benefit the training robustness for back-propagation; from the view of forward-propagation, a small h can aid in the robustness of the model generalization. A comprehensive empirical evaluation on both vision CIFAR-10 and text AG-NEWS datasets confirms that a small h aids both the training and generalization robustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.10887v3-abstract-full').style.display = 'none'; document.getElementById('1902.10887v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 February, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1812.05451">arXiv:1812.05451</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1812.05451">pdf</a>, <a href="https://arxiv.org/format/1812.05451">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Probabilistic Model of the Bitcoin Blockchain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jourdan%2C+M">Marc Jourdan</a>, <a href="/search/cs?searchtype=author&amp;query=Blandin%2C+S">Sebastien Blandin</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Deshpande%2C+P">Pralhad Deshpande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1812.05451v1-abstract-short" style="display: inline;"> The Bitcoin transaction graph is a public data structure organized as transactions between addresses, each associated with a logical entity. In this work, we introduce a complete probabilistic model of the Bitcoin Blockchain. We first formulate a set of conditional dependencies induced by the Bitcoin protocol at the block level and derive a corresponding fully observed graphical model of a Bitcoin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.05451v1-abstract-full').style.display = 'inline'; document.getElementById('1812.05451v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1812.05451v1-abstract-full" style="display: none;"> The Bitcoin transaction graph is a public data structure organized as transactions between addresses, each associated with a logical entity. In this work, we introduce a complete probabilistic model of the Bitcoin Blockchain. We first formulate a set of conditional dependencies induced by the Bitcoin protocol at the block level and derive a corresponding fully observed graphical model of a Bitcoin block. We then extend the model to include hidden entity attributes such as the functional category of the associated logical agent and derive asymptotic bounds on the privacy properties implied by this model. At the network level, we show evidence of complex transaction-to-transaction behavior and present a relevant discriminative model of the agent categories. Performance of both the block-based graphical model and the network-level discriminative model is evaluated on a subset of the public Bitcoin Blockchain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.05451v1-abstract-full').style.display = 'none'; document.getElementById('1812.05451v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.11956">arXiv:1810.11956</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1810.11956">pdf</a>, <a href="https://arxiv.org/format/1810.11956">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Characterizing Entities in the Bitcoin Blockchain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jourdan%2C+M">Marc Jourdan</a>, <a href="/search/cs?searchtype=author&amp;query=Blandin%2C+S">Sebastien Blandin</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Deshpande%2C+P">Pralhad Deshpande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.11956v1-abstract-short" style="display: inline;"> Bitcoin has created a new exchange paradigm within which financial transactions can be trusted without an intermediary. This premise of a free decentralized transactional network however requires, in its current implementation, unrestricted access to the ledger for peer-based transaction verification. A number of studies have shown that, in this pseudonymous context, identities can be leaked based&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.11956v1-abstract-full').style.display = 'inline'; document.getElementById('1810.11956v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.11956v1-abstract-full" style="display: none;"> Bitcoin has created a new exchange paradigm within which financial transactions can be trusted without an intermediary. This premise of a free decentralized transactional network however requires, in its current implementation, unrestricted access to the ledger for peer-based transaction verification. A number of studies have shown that, in this pseudonymous context, identities can be leaked based on transaction features or off-network information. In this work, we analyze the information revealed by the pattern of transactions in the neighborhood of a given entity transaction. By definition, these features which pertain to an extended network are not directly controllable by the entity, but might enable leakage of information about transacting entities. We define a number of new features relevant to entity characterization on the Bitcoin Blockchain and study their efficacy in practice. We show that even a weak attacker with shallow data mining knowledge is able to leverage these features to characterize the entity properties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.11956v1-abstract-full').style.display = 'none'; document.getElementById('1810.11956v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1809.10315">arXiv:1809.10315</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1809.10315">pdf</a>, <a href="https://arxiv.org/format/1809.10315">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Smooth Inter-layer Propagation of Stabilized Neural Networks for Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jingfeng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1809.10315v2-abstract-short" style="display: inline;"> Recent work has studied the reasons for the remarkable performance of deep neural networks in image classification. We examine batch normalization on the one hand and the dynamical systems view of residual networks on the other hand. Our goal is in understanding the notions of stability and smoothness of the inter-layer propagation of ResNets so as to explain when they contribute to significantly&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.10315v2-abstract-full').style.display = 'inline'; document.getElementById('1809.10315v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1809.10315v2-abstract-full" style="display: none;"> Recent work has studied the reasons for the remarkable performance of deep neural networks in image classification. We examine batch normalization on the one hand and the dynamical systems view of residual networks on the other hand. Our goal is in understanding the notions of stability and smoothness of the inter-layer propagation of ResNets so as to explain when they contribute to significantly enhanced performance. We postulate that such stability is of importance for the trained ResNet to transfer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.10315v2-abstract-full').style.display = 'none'; document.getElementById('1809.10315v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Revised Abstract</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1703.00759">arXiv:1703.00759</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1703.00759">pdf</a>, <a href="https://arxiv.org/format/1703.00759">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Real-time public transport service-level monitoring using passive WiFi: a spectral clustering approach for train timetable estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+B">Baoyang Song</a>, <a href="/search/cs?searchtype=author&amp;query=Wynter%2C+L">Laura Wynter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1703.00759v1-abstract-short" style="display: inline;"> A new area in which passive WiFi analytics have promise for delivering value is the real-time monitoring of public transport systems. One example is determining the true (as opposed to the published) timetable of a public transport system in real-time. In most cases, there are no other publicly-available sources for this information. Yet, it is indispensable for the real-time monitoring of public&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.00759v1-abstract-full').style.display = 'inline'; document.getElementById('1703.00759v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1703.00759v1-abstract-full" style="display: none;"> A new area in which passive WiFi analytics have promise for delivering value is the real-time monitoring of public transport systems. One example is determining the true (as opposed to the published) timetable of a public transport system in real-time. In most cases, there are no other publicly-available sources for this information. Yet, it is indispensable for the real-time monitoring of public transport service levels. Furthermore, this information, if accurate and temporally fine-grained, can be used for very low-latency incident detection. In this work, we propose using spectral clustering based on trajectories derived from passive WiFi traces of users of a public transport system to infer the true timetable and two key performance indicators of the transport service, namely public transport vehicle headway and in-station dwell time. By detecting anomalous dwell times or headways, we demonstrate that a fast and accurate real-time incident-detection procedure can be obtained. The method we introduce makes use of the advantages of the high-frequency WiFi data, which provides very low-latency, universally-accessible information, while minimizing the impact of the noise in the data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.00759v1-abstract-full').style.display = 'none'; document.getElementById('1703.00759v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2017. </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10