Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–38 of 38 results for author: <span class="mathjax">Lao, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Lao%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lao, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lao%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lao, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12071">arXiv:2411.12071</a> <span> [<a href="https://arxiv.org/pdf/2411.12071">pdf</a>, <a href="https://arxiv.org/ps/2411.12071">ps</a>, <a href="https://arxiv.org/format/2411.12071">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Theoretical Corrections and the Leveraging of Reinforcement Learning to Enhance Triangle Attack </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Meng%2C+N">Nicole Meng</a>, <a href="/search/cs?searchtype=author&query=Manicke%2C+C">Caleb Manicke</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">David Chen</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+C">Caiwen Ding</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+P">Pengyu Hong</a>, <a href="/search/cs?searchtype=author&query=Mahmood%2C+K">Kaleel Mahmood</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12071v1-abstract-short" style="display: inline;"> Adversarial examples represent a serious issue for the application of machine learning models in many sensitive domains. For generating adversarial examples, decision based black-box attacks are one of the most practical techniques as they only require query access to the model. One of the most recently proposed state-of-the-art decision based black-box attacks is Triangle Attack (TA). In this pap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12071v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12071v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12071v1-abstract-full" style="display: none;"> Adversarial examples represent a serious issue for the application of machine learning models in many sensitive domains. For generating adversarial examples, decision based black-box attacks are one of the most practical techniques as they only require query access to the model. One of the most recently proposed state-of-the-art decision based black-box attacks is Triangle Attack (TA). In this paper, we offer a high-level description of TA and explain potential theoretical limitations. We then propose a new decision based black-box attack, Triangle Attack with Reinforcement Learning (TARL). Our new attack addresses the limits of TA by leveraging reinforcement learning. This creates an attack that can achieve similar, if not better, attack accuracy than TA with half as many queries on state-of-the-art classifiers and defenses across ImageNet and CIFAR-10. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12071v1-abstract-full').style.display = 'none'; document.getElementById('2411.12071v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01989">arXiv:2409.01989</a> <span> [<a href="https://arxiv.org/pdf/2409.01989">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Disordered Systems and Neural Networks">cond-mat.dis-nn</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Improving Electrolyte Performance for Target Cathode Loading Using Interpretable Data-Driven Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sharma%2C+V">Vidushi Sharma</a>, <a href="/search/cs?searchtype=author&query=Tek%2C+A">Andy Tek</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+K">Khanh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Giammona%2C+M">Max Giammona</a>, <a href="/search/cs?searchtype=author&query=Zohair%2C+M">Murtaza Zohair</a>, <a href="/search/cs?searchtype=author&query=Sundberg%2C+L">Linda Sundberg</a>, <a href="/search/cs?searchtype=author&query=La%2C+Y">Young-Hye La</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01989v1-abstract-short" style="display: inline;"> Higher loading of active electrode materials is desired in batteries, especially those based on conversion reactions, for enhanced energy density and cost efficiency. However, increasing active material loading in electrodes can cause significant performance depreciation due to internal resistance, shuttling, and parasitic side reactions, which can be alleviated to a certain extent by a compatible… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01989v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01989v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01989v1-abstract-full" style="display: none;"> Higher loading of active electrode materials is desired in batteries, especially those based on conversion reactions, for enhanced energy density and cost efficiency. However, increasing active material loading in electrodes can cause significant performance depreciation due to internal resistance, shuttling, and parasitic side reactions, which can be alleviated to a certain extent by a compatible design of electrolytes. In this work, a data-driven approach is leveraged to find a high-performing electrolyte formulation for a novel interhalogen battery custom to the target cathode loading. An electrolyte design consisting of 4 solvents and 4 salts is experimentally devised for a novel interhalogen battery based on a multi-electron redox reaction. The experimental dataset with variable electrolyte compositions and active cathode loading, is used to train a graph-based deep learning model mapping changing variables in the battery's material design to its specific capacity. The trained model is used to further optimize the electrolyte formulation compositions for enhancing the battery capacity at a target cathode loading by a two-fold approach: large-scale screening and interpreting electrolyte design principles for different cathode loadings. The data-driven approach is demonstrated to bring about an additional 20% increment in the specific capacity of the battery over capacities obtained from the experimental optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01989v1-abstract-full').style.display = 'none'; document.getElementById('2409.01989v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">34 Pages, 5 Figures, 2 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05409">arXiv:2408.05409</a> <span> [<a href="https://arxiv.org/pdf/2408.05409">pdf</a>, <a href="https://arxiv.org/format/2408.05409">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RSL-BA: Rolling Shutter Line Bundle Adjustment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongcong Zhang</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+B">Bangyan Liao</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Y">Yifei Xue</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chen Lu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Peidong Liu</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yizhen Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05409v1-abstract-short" style="display: inline;"> The line is a prevalent element in man-made environments, inherently encoding spatial structural information, thus making it a more robust choice for feature representation in practical applications. Despite its apparent advantages, previous rolling shutter bundle adjustment (RSBA) methods have only supported sparse feature points, which lack robustness, particularly in degenerate environments. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05409v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05409v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05409v1-abstract-full" style="display: none;"> The line is a prevalent element in man-made environments, inherently encoding spatial structural information, thus making it a more robust choice for feature representation in practical applications. Despite its apparent advantages, previous rolling shutter bundle adjustment (RSBA) methods have only supported sparse feature points, which lack robustness, particularly in degenerate environments. In this paper, we introduce the first rolling shutter line-based bundle adjustment solution, RSL-BA. Specifically, we initially establish the rolling shutter camera line projection theory utilizing Pl眉cker line parameterization. Subsequently, we derive a series of reprojection error formulations which are stable and efficient. Finally, we theoretically and experimentally demonstrate that our method can prevent three common degeneracies, one of which is first discovered in this paper. Extensive synthetic and real data experiments demonstrate that our method achieves efficiency and accuracy comparable to existing point-based rolling shutter bundle adjustment solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05409v1-abstract-full').style.display = 'none'; document.getElementById('2408.05409v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18611">arXiv:2407.18611</a> <span> [<a href="https://arxiv.org/pdf/2407.18611">pdf</a>, <a href="https://arxiv.org/format/2407.18611">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> IOVS4NeRF:Incremental Optimal View Selection for Large-Scale NeRFs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jingpeng Xie</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+S">Shiyu Tan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuanlei Wang</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yizhen Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18611v2-abstract-short" style="display: inline;"> Neural Radiance Fields (NeRF) have recently demonstrated significant efficiency in the reconstruction of three-dimensional scenes and the synthesis of novel perspectives from a limited set of two-dimensional images. However, large-scale reconstruction using NeRF requires a substantial amount of aerial imagery for training, making it impractical in resource-constrained environments. This paper intr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18611v2-abstract-full').style.display = 'inline'; document.getElementById('2407.18611v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18611v2-abstract-full" style="display: none;"> Neural Radiance Fields (NeRF) have recently demonstrated significant efficiency in the reconstruction of three-dimensional scenes and the synthesis of novel perspectives from a limited set of two-dimensional images. However, large-scale reconstruction using NeRF requires a substantial amount of aerial imagery for training, making it impractical in resource-constrained environments. This paper introduces an innovative incremental optimal view selection framework, IOVS4NeRF, designed to model a 3D scene within a restricted input budget. Specifically, our approach involves adding the existing training set with newly acquired samples, guided by a computed novel hybrid uncertainty of candidate views, which integrates rendering uncertainty and positional uncertainty. By selecting views that offer the highest information gain, the quality of novel view synthesis can be enhanced with minimal additional resources. Comprehensive experiments substantiate the efficiency of our model in realistic scenes, outperforming baselines and similar prior works, particularly under conditions of sparse training data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18611v2-abstract-full').style.display = 'none'; document.getElementById('2407.18611v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13803">arXiv:2407.13803</a> <span> [<a href="https://arxiv.org/pdf/2407.13803">pdf</a>, <a href="https://arxiv.org/format/2407.13803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Less is More: Sparse Watermarking in LLMs with Enhanced Text Quality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hoang%2C+D+C">Duy C. Hoang</a>, <a href="/search/cs?searchtype=author&query=Le%2C+H+T+Q">Hung T. Q. Le</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+R">Rui Chu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Ping Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Weijie Zhao</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Doan%2C+K+D">Khoa D. Doan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13803v1-abstract-short" style="display: inline;"> With the widespread adoption of Large Language Models (LLMs), concerns about potential misuse have emerged. To this end, watermarking has been adapted to LLM, enabling a simple and effective way to detect and monitor generated text. However, while the existing methods can differentiate between watermarked and unwatermarked text with high accuracy, they often face a trade-off between the quality of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13803v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13803v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13803v1-abstract-full" style="display: none;"> With the widespread adoption of Large Language Models (LLMs), concerns about potential misuse have emerged. To this end, watermarking has been adapted to LLM, enabling a simple and effective way to detect and monitor generated text. However, while the existing methods can differentiate between watermarked and unwatermarked text with high accuracy, they often face a trade-off between the quality of the generated text and the effectiveness of the watermarking process. In this work, we present a novel type of LLM watermark, Sparse Watermark, which aims to mitigate this trade-off by applying watermarks to a small subset of generated tokens distributed across the text. The key strategy involves anchoring watermarked tokens to words that have specific Part-of-Speech (POS) tags. Our experimental results demonstrate that the proposed watermarking scheme achieves high detectability while generating text that outperforms previous LLM watermarking methods in quality across various tasks <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13803v1-abstract-full').style.display = 'none'; document.getElementById('2407.13803v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.12357">arXiv:2405.12357</a> <span> [<a href="https://arxiv.org/pdf/2405.12357">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Paired Conditional Generative Adversarial Network for Highly Accelerated Liver 4D MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+D">Di Xu</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+X">Xin Miao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hengjie Liu</a>, <a href="/search/cs?searchtype=author&query=Scholey%2C+J+E">Jessica E. Scholey</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wensha Yang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+M">Mary Feng</a>, <a href="/search/cs?searchtype=author&query=Ohliger%2C+M">Michael Ohliger</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hui Lin</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yi Lao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+K">Ke Sheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.12357v1-abstract-short" style="display: inline;"> Purpose: 4D MRI with high spatiotemporal resolution is desired for image-guided liver radiotherapy. Acquiring densely sampling k-space data is time-consuming. Accelerated acquisition with sparse samples is desirable but often causes degraded image quality or long reconstruction time. We propose the Reconstruct Paired Conditional Generative Adversarial Network (Re-Con-GAN) to shorten the 4D MRI rec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12357v1-abstract-full').style.display = 'inline'; document.getElementById('2405.12357v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.12357v1-abstract-full" style="display: none;"> Purpose: 4D MRI with high spatiotemporal resolution is desired for image-guided liver radiotherapy. Acquiring densely sampling k-space data is time-consuming. Accelerated acquisition with sparse samples is desirable but often causes degraded image quality or long reconstruction time. We propose the Reconstruct Paired Conditional Generative Adversarial Network (Re-Con-GAN) to shorten the 4D MRI reconstruction time while maintaining the reconstruction quality. Methods: Patients who underwent free-breathing liver 4D MRI were included in the study. Fully- and retrospectively under-sampled data at 3, 6 and 10 times (3x, 6x and 10x) were first reconstructed using the nuFFT algorithm. Re-Con-GAN then trained input and output in pairs. Three types of networks, ResNet9, UNet and reconstruction swin transformer, were explored as generators. PatchGAN was selected as the discriminator. Re-Con-GAN processed the data (3D+t) as temporal slices (2D+t). A total of 48 patients with 12332 temporal slices were split into training (37 patients with 10721 slices) and test (11 patients with 1611 slices). Results: Re-Con-GAN consistently achieved comparable/better PSNR, SSIM, and RMSE scores compared to CS/UNet models. The inference time of Re-Con-GAN, UNet and CS are 0.15s, 0.16s, and 120s. The GTV detection task showed that Re-Con-GAN and CS, compared to UNet, better improved the dice score (3x Re-Con-GAN 80.98%; 3x CS 80.74%; 3x UNet 79.88%) of unprocessed under-sampled images (3x 69.61%). Conclusion: A generative network with adversarial training is proposed with promising and efficient reconstruction results demonstrated on an in-house dataset. The rapid and qualitative reconstruction of 4D liver MR has the potential to facilitate online adaptive MR-guided radiotherapy for liver cancer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12357v1-abstract-full').style.display = 'none'; document.getElementById('2405.12357v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15530">arXiv:2403.15530</a> <span> [<a href="https://arxiv.org/pdf/2403.15530">pdf</a>, <a href="https://arxiv.org/format/2403.15530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Pixel-GS: Density Control with Pixel-aware Gradient for 3D Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+W">Wenbo Hu</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yixing Lao</a>, <a href="/search/cs?searchtype=author&query=He%2C+T">Tong He</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengshuang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15530v1-abstract-short" style="display: inline;"> 3D Gaussian Splatting (3DGS) has demonstrated impressive novel view synthesis results while advancing real-time rendering performance. However, it relies heavily on the quality of the initial point cloud, resulting in blurring and needle-like artifacts in areas with insufficient initializing points. This is mainly attributed to the point cloud growth condition in 3DGS that only considers the avera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15530v1-abstract-full').style.display = 'inline'; document.getElementById('2403.15530v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15530v1-abstract-full" style="display: none;"> 3D Gaussian Splatting (3DGS) has demonstrated impressive novel view synthesis results while advancing real-time rendering performance. However, it relies heavily on the quality of the initial point cloud, resulting in blurring and needle-like artifacts in areas with insufficient initializing points. This is mainly attributed to the point cloud growth condition in 3DGS that only considers the average gradient magnitude of points from observable views, thereby failing to grow for large Gaussians that are observable for many viewpoints while many of them are only covered in the boundaries. To this end, we propose a novel method, named Pixel-GS, to take into account the number of pixels covered by the Gaussian in each view during the computation of the growth condition. We regard the covered pixel numbers as the weights to dynamically average the gradients from different views, such that the growth of large Gaussians can be prompted. As a result, points within the areas with insufficient initializing points can be grown more effectively, leading to a more accurate and detailed reconstruction. In addition, we propose a simple yet effective strategy to scale the gradient field according to the distance to the camera, to suppress the growth of floaters near the camera. Extensive experiments both qualitatively and quantitatively demonstrate that our method achieves state-of-the-art rendering quality while maintaining real-time rendering speed, on the challenging Mip-NeRF 360 and Tanks & Temples datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15530v1-abstract-full').style.display = 'none'; document.getElementById('2403.15530v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.17483">arXiv:2402.17483</a> <span> [<a href="https://arxiv.org/pdf/2402.17483">pdf</a>, <a href="https://arxiv.org/format/2402.17483">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AlignMiF: Geometry-Aligned Multimodal Implicit Field for LiDAR-Camera Joint Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+T">Tao Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guangrun Wang</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yixing Lao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+P">Peng Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jie Liu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+L">Liang Lin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+K">Kaicheng Yu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiaodan Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.17483v1-abstract-short" style="display: inline;"> Neural implicit fields have been a de facto standard in novel view synthesis. Recently, there exist some methods exploring fusing multiple modalities within a single field, aiming to share implicit features from different modalities to enhance reconstruction performance. However, these modalities often exhibit misaligned behaviors: optimizing for one modality, such as LiDAR, can adversely affect a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17483v1-abstract-full').style.display = 'inline'; document.getElementById('2402.17483v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.17483v1-abstract-full" style="display: none;"> Neural implicit fields have been a de facto standard in novel view synthesis. Recently, there exist some methods exploring fusing multiple modalities within a single field, aiming to share implicit features from different modalities to enhance reconstruction performance. However, these modalities often exhibit misaligned behaviors: optimizing for one modality, such as LiDAR, can adversely affect another, like camera performance, and vice versa. In this work, we conduct comprehensive analyses on the multimodal implicit field of LiDAR-camera joint synthesis, revealing the underlying issue lies in the misalignment of different sensors. Furthermore, we introduce AlignMiF, a geometrically aligned multimodal implicit field with two proposed modules: Geometry-Aware Alignment (GAA) and Shared Geometry Initialization (SGI). These modules effectively align the coarse geometry across different modalities, significantly enhancing the fusion process between LiDAR and camera data. Through extensive experiments across various datasets and scenes, we demonstrate the effectiveness of our approach in facilitating better interaction between LiDAR and camera modalities within a unified neural field. Specifically, our proposed AlignMiF, achieves remarkable improvement over recent implicit fusion methods (+2.01 and +3.11 image PSNR on the KITTI-360 and Waymo datasets) and consistently surpasses single modality performance (13.8% and 14.2% reduction in LiDAR Chamfer Distance on the respective datasets). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17483v1-abstract-full').style.display = 'none'; document.getElementById('2402.17483v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.04554">arXiv:2402.04554</a> <span> [<a href="https://arxiv.org/pdf/2402.04554">pdf</a>, <a href="https://arxiv.org/format/2402.04554">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BirdNeRF: Fast Neural Reconstruction of Large-Scale Scenes From Aerial Imagery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huiqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Y">Yifei Xue</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+M">Ming Liao</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yizhen Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.04554v2-abstract-short" style="display: inline;"> In this study, we introduce BirdNeRF, an adaptation of Neural Radiance Fields (NeRF) designed specifically for reconstructing large-scale scenes using aerial imagery. Unlike previous research focused on small-scale and object-centric NeRF reconstruction, our approach addresses multiple challenges, including (1) Addressing the issue of slow training and rendering associated with large models. (2) M… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04554v2-abstract-full').style.display = 'inline'; document.getElementById('2402.04554v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.04554v2-abstract-full" style="display: none;"> In this study, we introduce BirdNeRF, an adaptation of Neural Radiance Fields (NeRF) designed specifically for reconstructing large-scale scenes using aerial imagery. Unlike previous research focused on small-scale and object-centric NeRF reconstruction, our approach addresses multiple challenges, including (1) Addressing the issue of slow training and rendering associated with large models. (2) Meeting the computational demands necessitated by modeling a substantial number of images, requiring extensive resources such as high-performance GPUs. (3) Overcoming significant artifacts and low visual fidelity commonly observed in large-scale reconstruction tasks due to limited model capacity. Specifically, we present a novel bird-view pose-based spatial decomposition algorithm that decomposes a large aerial image set into multiple small sets with appropriately sized overlaps, allowing us to train individual NeRFs of sub-scene. This decomposition approach not only decouples rendering time from the scene size but also enables rendering to scale seamlessly to arbitrarily large environments. Moreover, it allows for per-block updates of the environment, enhancing the flexibility and adaptability of the reconstruction process. Additionally, we propose a projection-guided novel view re-rendering strategy, which aids in effectively utilizing the independently trained sub-scenes to generate superior rendering results. We evaluate our approach on existing datasets as well as against our own drone footage, improving reconstruction speed by 10x over classical photogrammetry software and 50x over state-of-the-art large-scale NeRF solution, on a single GPU with similar rendering quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04554v2-abstract-full').style.display = 'none'; document.getElementById('2402.04554v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.09126">arXiv:2401.09126</a> <span> [<a href="https://arxiv.org/pdf/2401.09126">pdf</a>, <a href="https://arxiv.org/format/2401.09126">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Objects With Lighting: A Real-World Dataset for Evaluating Reconstruction and Rendering for Object Relighting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ummenhofer%2C+B">Benjamin Ummenhofer</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+S">Sanskar Agrawal</a>, <a href="/search/cs?searchtype=author&query=Sepulveda%2C+R">Rene Sepulveda</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yixing Lao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+T">Tianhang Cheng</a>, <a href="/search/cs?searchtype=author&query=Richter%2C+S">Stephan Richter</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shenlong Wang</a>, <a href="/search/cs?searchtype=author&query=Ros%2C+G">German Ros</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.09126v2-abstract-short" style="display: inline;"> Reconstructing an object from photos and placing it virtually in a new environment goes beyond the standard novel view synthesis task as the appearance of the object has to not only adapt to the novel viewpoint but also to the new lighting conditions and yet evaluations of inverse rendering methods rely on novel view synthesis data or simplistic synthetic datasets for quantitative analysis. This w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09126v2-abstract-full').style.display = 'inline'; document.getElementById('2401.09126v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.09126v2-abstract-full" style="display: none;"> Reconstructing an object from photos and placing it virtually in a new environment goes beyond the standard novel view synthesis task as the appearance of the object has to not only adapt to the novel viewpoint but also to the new lighting conditions and yet evaluations of inverse rendering methods rely on novel view synthesis data or simplistic synthetic datasets for quantitative analysis. This work presents a real-world dataset for measuring the reconstruction and rendering of objects for relighting. To this end, we capture the environment lighting and ground truth images of the same objects in multiple environments allowing to reconstruct the objects from images taken in one environment and quantify the quality of the rendered views for the unseen lighting environments. Further, we introduce a simple baseline composed of off-the-shelf methods and test several state-of-the-art methods on the relighting task and show that novel view synthesis is not a reliable proxy to measure performance. Code and dataset are available at https://github.com/isl-org/objects-with-lighting . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09126v2-abstract-full').style.display = 'none'; document.getElementById('2401.09126v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at 3DV 2024, Oral presentation. For the project page see https://github.com/isl-org/objects-with-lighting</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03844">arXiv:2401.03844</a> <span> [<a href="https://arxiv.org/pdf/2401.03844">pdf</a>, <a href="https://arxiv.org/format/2401.03844">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Fully Attentional Networks with Self-emerging Token Labeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bingyin Zhao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhiding Yu</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+S">Shiyi Lan</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yutao Cheng</a>, <a href="/search/cs?searchtype=author&query=Anandkumar%2C+A">Anima Anandkumar</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Alvarez%2C+J+M">Jose M. Alvarez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03844v1-abstract-short" style="display: inline;"> Recent studies indicate that Vision Transformers (ViTs) are robust against out-of-distribution scenarios. In particular, the Fully Attentional Network (FAN) - a family of ViT backbones, has achieved state-of-the-art robustness. In this paper, we revisit the FAN models and improve their pre-training with a self-emerging token labeling (STL) framework. Our method contains a two-stage training framew… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03844v1-abstract-full').style.display = 'inline'; document.getElementById('2401.03844v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03844v1-abstract-full" style="display: none;"> Recent studies indicate that Vision Transformers (ViTs) are robust against out-of-distribution scenarios. In particular, the Fully Attentional Network (FAN) - a family of ViT backbones, has achieved state-of-the-art robustness. In this paper, we revisit the FAN models and improve their pre-training with a self-emerging token labeling (STL) framework. Our method contains a two-stage training framework. Specifically, we first train a FAN token labeler (FAN-TL) to generate semantically meaningful patch token labels, followed by a FAN student model training stage that uses both the token labels and the original class label. With the proposed STL framework, our best model based on FAN-L-Hybrid (77.3M parameters) achieves 84.8% Top-1 accuracy and 42.1% mCE on ImageNet-1K and ImageNet-C, and sets a new state-of-the-art for ImageNet-A (46.1%) and ImageNet-R (56.6%) without using extra data, outperforming the original FAN counterpart by significant margins. The proposed framework also demonstrates significantly enhanced performance on downstream tasks such as semantic segmentation, with up to 1.7% improvement in robustness over the counterpart model. Code is available at https://github.com/NVlabs/STL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03844v1-abstract-full').style.display = 'none'; document.getElementById('2401.03844v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV), 2023, pp. 5585-5595 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10657">arXiv:2312.10657</a> <span> [<a href="https://arxiv.org/pdf/2312.10657">pdf</a>, <a href="https://arxiv.org/format/2312.10657">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> UltraClean: A Simple Framework to Train Robust Neural Networks against Backdoor Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bingyin Zhao</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10657v1-abstract-short" style="display: inline;"> Backdoor attacks are emerging threats to deep neural networks, which typically embed malicious behaviors into a victim model by injecting poisoned samples. Adversaries can activate the injected backdoor during inference by presenting the trigger on input images. Prior defensive methods have achieved remarkable success in countering dirty-label backdoor attacks where the labels of poisoned samples… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10657v1-abstract-full').style.display = 'inline'; document.getElementById('2312.10657v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10657v1-abstract-full" style="display: none;"> Backdoor attacks are emerging threats to deep neural networks, which typically embed malicious behaviors into a victim model by injecting poisoned samples. Adversaries can activate the injected backdoor during inference by presenting the trigger on input images. Prior defensive methods have achieved remarkable success in countering dirty-label backdoor attacks where the labels of poisoned samples are often mislabeled. However, these approaches do not work for a recent new type of backdoor -- clean-label backdoor attacks that imperceptibly modify poisoned data and hold consistent labels. More complex and powerful algorithms are demanded to defend against such stealthy attacks. In this paper, we propose UltraClean, a general framework that simplifies the identification of poisoned samples and defends against both dirty-label and clean-label backdoor attacks. Given the fact that backdoor triggers introduce adversarial noise that intensifies in feed-forward propagation, UltraClean first generates two variants of training samples using off-the-shelf denoising functions. It then measures the susceptibility of training samples leveraging the error amplification effect in DNNs, which dilates the noise difference between the original image and denoised variants. Lastly, it filters out poisoned samples based on the susceptibility to thwart the backdoor implantation. Despite its simplicity, UltraClean achieves a superior detection rate across various datasets and significantly reduces the backdoor attack success rate while maintaining a decent model accuracy on clean data, outperforming existing defensive methods by a large margin. Code is available at https://github.com/bxz9200/UltraClean. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10657v1-abstract-full').style.display = 'none'; document.getElementById('2312.10657v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.06642">arXiv:2312.06642</a> <span> [<a href="https://arxiv.org/pdf/2312.06642">pdf</a>, <a href="https://arxiv.org/format/2312.06642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CorresNeRF: Image Correspondence Priors for Neural Radiance Fields </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yixing Lao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaogang Xu</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Z">Zhipeng Cai</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xihui Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengshuang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.06642v1-abstract-short" style="display: inline;"> Neural Radiance Fields (NeRFs) have achieved impressive results in novel view synthesis and surface reconstruction tasks. However, their performance suffers under challenging scenarios with sparse input views. We present CorresNeRF, a novel method that leverages image correspondence priors computed by off-the-shelf methods to supervise NeRF training. We design adaptive processes for augmentation a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06642v1-abstract-full').style.display = 'inline'; document.getElementById('2312.06642v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.06642v1-abstract-full" style="display: none;"> Neural Radiance Fields (NeRFs) have achieved impressive results in novel view synthesis and surface reconstruction tasks. However, their performance suffers under challenging scenarios with sparse input views. We present CorresNeRF, a novel method that leverages image correspondence priors computed by off-the-shelf methods to supervise NeRF training. We design adaptive processes for augmentation and filtering to generate dense and high-quality correspondences. The correspondences are then used to regularize NeRF training via the correspondence pixel reprojection and depth loss terms. We evaluate our methods on novel view synthesis and surface reconstruction tasks with density-based and SDF-based NeRF models on different datasets. Our method outperforms previous methods in both photometric and geometric metrics. We show that this simple yet effective technique of using correspondence priors can be applied as a plug-and-play module across different NeRF variants. The project page is at https://yxlao.github.io/corres-nerf. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06642v1-abstract-full').style.display = 'none'; document.getElementById('2312.06642v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> NeurIPS 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04618">arXiv:2310.04618</a> <span> [<a href="https://arxiv.org/pdf/2310.04618">pdf</a>, <a href="https://arxiv.org/format/2310.04618">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICCAD57390.2023.10323839">10.1109/ICCAD57390.2023.10323839 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> KyberMat: Efficient Accelerator for Matrix-Vector Polynomial Multiplication in CRYSTALS-Kyber Scheme via NTT and Polyphase Decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+W">Weihang Tan</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Parhi%2C+K+K">Keshab K. Parhi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04618v1-abstract-short" style="display: inline;"> CRYSTAL-Kyber (Kyber) is one of the post-quantum cryptography (PQC) key-encapsulation mechanism (KEM) schemes selected during the standardization process. This paper addresses optimization for Kyber architecture with respect to latency and throughput constraints. Specifically, matrix-vector multiplication and number theoretic transform (NTT)-based polynomial multiplication are critical operations… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04618v1-abstract-full').style.display = 'inline'; document.getElementById('2310.04618v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04618v1-abstract-full" style="display: none;"> CRYSTAL-Kyber (Kyber) is one of the post-quantum cryptography (PQC) key-encapsulation mechanism (KEM) schemes selected during the standardization process. This paper addresses optimization for Kyber architecture with respect to latency and throughput constraints. Specifically, matrix-vector multiplication and number theoretic transform (NTT)-based polynomial multiplication are critical operations and bottlenecks that require optimization. To address this challenge, we propose an algorithm and hardware co-design approach to systematically optimize matrix-vector multiplication and NTT-based polynomial multiplication by employing a novel sub-structure sharing technique in order to reduce computational complexity, i.e., the number of modular multiplications and modular additions/subtractions consumed. The sub-structure sharing approach is inspired by prior fast parallel approaches based on polyphase decomposition. The proposed efficient feed-forward architecture achieves high speed, low latency, and full utilization of all hardware components, which can significantly enhance the overall efficiency of the Kyber scheme. The FPGA implementation results show that our proposed design, using the fast two-parallel structure, leads to an approximate reduction of 90% in execution time, along with a 66 times improvement in throughput performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04618v1-abstract-full').style.display = 'none'; document.getElementById('2310.04618v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proc. 2023 IEEE/ACM International Conference on Computer Aided Design (ICCAD), San Francisco, CA, Oct. 29 - Nov. 2, 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2023 IEEE/ACM International Conference on Computer Aided Design (ICCAD) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.00567">arXiv:2310.00567</a> <span> [<a href="https://arxiv.org/pdf/2310.00567">pdf</a>, <a href="https://arxiv.org/format/2310.00567">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Understanding the Robustness of Randomized Feature Defense Against Query-Based Adversarial Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+Q+H">Quang H. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+T">Tung Pham</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+K">Kok-Seng Wong</a>, <a href="/search/cs?searchtype=author&query=Doan%2C+K+D">Khoa D. Doan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.00567v1-abstract-short" style="display: inline;"> Recent works have shown that deep neural networks are vulnerable to adversarial examples that find samples close to the original image but can make the model misclassify. Even with access only to the model's output, an attacker can employ black-box attacks to generate such adversarial examples. In this work, we propose a simple and lightweight defense against black-box attacks by adding random noi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00567v1-abstract-full').style.display = 'inline'; document.getElementById('2310.00567v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.00567v1-abstract-full" style="display: none;"> Recent works have shown that deep neural networks are vulnerable to adversarial examples that find samples close to the original image but can make the model misclassify. Even with access only to the model's output, an attacker can employ black-box attacks to generate such adversarial examples. In this work, we propose a simple and lightweight defense against black-box attacks by adding random noise to hidden features at intermediate layers of the model at inference time. Our theoretical analysis confirms that this method effectively enhances the model's resilience against both score-based and decision-based black-box attacks. Importantly, our defense does not necessitate adversarial training and has minimal impact on accuracy, rendering it applicable to any pre-trained model. Our analysis also reveals the significance of selectively adding noise to different parts of the model based on the gradient of the adversarial objective function, which can be varied during the attack. We demonstrate the robustness of our defense against multiple black-box attacks through extensive empirical experiments involving diverse models with various architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00567v1-abstract-full').style.display = 'none'; document.getElementById('2310.00567v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.05129">arXiv:2307.05129</a> <span> [<a href="https://arxiv.org/pdf/2307.05129">pdf</a>, <a href="https://arxiv.org/format/2307.05129">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DFR: Depth from Rotation by Uncalibrated Image Rectification with Latitudinal Motion Assumption </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongcong Zhang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Y">Yifei Xue</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+M">Ming Liao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huiqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yizhen Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.05129v1-abstract-short" style="display: inline;"> Despite the increasing prevalence of rotating-style capture (e.g., surveillance cameras), conventional stereo rectification techniques frequently fail due to the rotation-dominant motion and small baseline between views. In this paper, we tackle the challenge of performing stereo rectification for uncalibrated rotating cameras. To that end, we propose Depth-from-Rotation (DfR), a novel image recti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05129v1-abstract-full').style.display = 'inline'; document.getElementById('2307.05129v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.05129v1-abstract-full" style="display: none;"> Despite the increasing prevalence of rotating-style capture (e.g., surveillance cameras), conventional stereo rectification techniques frequently fail due to the rotation-dominant motion and small baseline between views. In this paper, we tackle the challenge of performing stereo rectification for uncalibrated rotating cameras. To that end, we propose Depth-from-Rotation (DfR), a novel image rectification solution that analytically rectifies two images with two-point correspondences and serves for further depth estimation. Specifically, we model the motion of a rotating camera as the camera rotates on a sphere with fixed latitude. The camera's optical axis lies perpendicular to the sphere's surface. We call this latitudinal motion assumption. Then we derive a 2-point analytical solver from directly computing the rectified transformations on the two images. We also present a self-adaptive strategy to reduce the geometric distortion after rectification. Extensive synthetic and real data experiments demonstrate that the proposed method outperforms existing works in effectiveness and efficiency by a significant margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05129v1-abstract-full').style.display = 'none'; document.getElementById('2307.05129v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.03811">arXiv:2307.03811</a> <span> [<a href="https://arxiv.org/pdf/2307.03811">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Disordered Systems and Neural Networks">cond-mat.dis-nn</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1021/acs.jcim.3c01030">10.1021/acs.jcim.3c01030 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Formulation Graphs for Mapping Structure-Composition of Battery Electrolytes to Device Performance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sharma%2C+V">Vidushi Sharma</a>, <a href="/search/cs?searchtype=author&query=Giammona%2C+M">Maxwell Giammona</a>, <a href="/search/cs?searchtype=author&query=Zubarev%2C+D">Dmitry Zubarev</a>, <a href="/search/cs?searchtype=author&query=Tek%2C+A">Andy Tek</a>, <a href="/search/cs?searchtype=author&query=Nugyuen%2C+K">Khanh Nugyuen</a>, <a href="/search/cs?searchtype=author&query=Sundberg%2C+L">Linda Sundberg</a>, <a href="/search/cs?searchtype=author&query=Congiu%2C+D">Daniele Congiu</a>, <a href="/search/cs?searchtype=author&query=La%2C+Y">Young-Hye La</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.03811v3-abstract-short" style="display: inline;"> Advanced computational methods are being actively sought for addressing the challenges associated with discovery and development of new combinatorial material such as formulations. A widely adopted approach involves domain informed high-throughput screening of individual components that can be combined into a formulation. This manages to accelerate the discovery of new compounds for a target appli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03811v3-abstract-full').style.display = 'inline'; document.getElementById('2307.03811v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.03811v3-abstract-full" style="display: none;"> Advanced computational methods are being actively sought for addressing the challenges associated with discovery and development of new combinatorial material such as formulations. A widely adopted approach involves domain informed high-throughput screening of individual components that can be combined into a formulation. This manages to accelerate the discovery of new compounds for a target application but still leave the process of identifying the right 'formulation' from the shortlisted chemical space largely a laboratory experiment-driven process. We report a deep learning model, Formulation Graph Convolution Network (F-GCN), that can map structure-composition relationship of the individual components to the property of liquid formulation as whole. Multiple GCNs are assembled in parallel that featurize formulation constituents domain-intuitively on the fly. The resulting molecular descriptors are scaled based on respective constituent's molar percentage in the formulation, followed by formalizing into a combined descriptor that represents a complete formulation to an external learning architecture. The use case of proposed formulation learning model is demonstrated for battery electrolytes by training and testing it on two exemplary datasets representing electrolyte formulations vs battery performance -- one dataset is sourced from literature about Li/Cu half-cells, while the other is obtained by lab-experiments related to lithium-iodide full-cell chemistry. The model is shown to predict the performance metrics like Coulombic Efficiency (CE) and specific capacity of new electrolyte formulations with lowest reported errors. The best performing F-GCN model uses molecular descriptors derived from molecular graphs that are informed with HOMO-LUMO and electric moment properties of the molecules using a knowledge transfer technique. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03811v3-abstract-full').style.display = 'none'; document.getElementById('2307.03811v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.10406">arXiv:2304.10406</a> <span> [<a href="https://arxiv.org/pdf/2304.10406">pdf</a>, <a href="https://arxiv.org/format/2304.10406">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LiDAR-NeRF: Novel LiDAR View Synthesis via Neural Radiance Fields </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tao%2C+T">Tang Tao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Longfei Gao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guangrun Wang</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yixing Lao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+P">Peng Chen</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengshuang Zhao</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+D">Dayang Hao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiaodan Liang</a>, <a href="/search/cs?searchtype=author&query=Salzmann%2C+M">Mathieu Salzmann</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+K">Kaicheng Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.10406v2-abstract-short" style="display: inline;"> We introduce a new task, novel view synthesis for LiDAR sensors. While traditional model-based LiDAR simulators with style-transfer neural networks can be applied to render novel views, they fall short of producing accurate and realistic LiDAR patterns because the renderers rely on explicit 3D reconstruction and exploit game engines, that ignore important attributes of LiDAR points. We address thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.10406v2-abstract-full').style.display = 'inline'; document.getElementById('2304.10406v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.10406v2-abstract-full" style="display: none;"> We introduce a new task, novel view synthesis for LiDAR sensors. While traditional model-based LiDAR simulators with style-transfer neural networks can be applied to render novel views, they fall short of producing accurate and realistic LiDAR patterns because the renderers rely on explicit 3D reconstruction and exploit game engines, that ignore important attributes of LiDAR points. We address this challenge by formulating, to the best of our knowledge, the first differentiable end-to-end LiDAR rendering framework, LiDAR-NeRF, leveraging a neural radiance field (NeRF) to facilitate the joint learning of geometry and the attributes of 3D points. However, simply employing NeRF cannot achieve satisfactory results, as it only focuses on learning individual pixels while ignoring local information, especially at low texture areas, resulting in poor geometry. To this end, we have taken steps to address this issue by introducing a structural regularization method to preserve local structural details. To evaluate the effectiveness of our approach, we establish an object-centric multi-view LiDAR dataset, dubbed NeRF-MVL. It contains observations of objects from 9 categories seen from 360-degree viewpoints captured with multiple LiDAR sensors. Our extensive experiments on the scene-level KITTI-360 dataset, and on our object-level NeRF-MVL show that our LiDAR-NeRF surpasses the model-based algorithms significantly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.10406v2-abstract-full').style.display = 'none'; document.getElementById('2304.10406v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper introduces a new task of novel LiDAR view synthesis, and proposes a differentiable framework called LiDAR-NeRF with a structural regularization, as well as an object-centric multi-view LiDAR dataset called NeRF-MVL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.09783">arXiv:2304.09783</a> <span> [<a href="https://arxiv.org/pdf/2304.09783">pdf</a>, <a href="https://arxiv.org/format/2304.09783">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Application of attention-based Siamese composite neural network in medical image recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zihao Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Xin%2C+W">Weixing Xin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xingtong Lin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Huizhen Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haowen Chen</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yizhen Lao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xia Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.09783v3-abstract-short" style="display: inline;"> Medical image recognition often faces the problem of insufficient data in practical applications. Image recognition and processing under few-shot conditions will produce overfitting, low recognition accuracy, low reliability and insufficient robustness. It is often the case that the difference of characteristics is subtle, and the recognition is affected by perspectives, background, occlusion and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.09783v3-abstract-full').style.display = 'inline'; document.getElementById('2304.09783v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.09783v3-abstract-full" style="display: none;"> Medical image recognition often faces the problem of insufficient data in practical applications. Image recognition and processing under few-shot conditions will produce overfitting, low recognition accuracy, low reliability and insufficient robustness. It is often the case that the difference of characteristics is subtle, and the recognition is affected by perspectives, background, occlusion and other factors, which increases the difficulty of recognition. Furthermore, in fine-grained images, the few-shot problem leads to insufficient useful feature information in the images. Considering the characteristics of few-shot and fine-grained image recognition, this study has established a recognition model based on attention and Siamese neural network. Aiming at the problem of few-shot samples, a Siamese neural network suitable for classification model is proposed. The Attention-Based neural network is used as the main network to improve the classification effect. Covid- 19 lung samples have been selected for testing the model. The results show that the less the number of image samples are, the more obvious the advantage shows than the ordinary neural network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.09783v3-abstract-full').style.display = 'none'; document.getElementById('2304.09783v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.18125">arXiv:2303.18125</a> <span> [<a href="https://arxiv.org/pdf/2303.18125">pdf</a>, <a href="https://arxiv.org/format/2303.18125">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Nonlinear-Motion-Aware and Occlusion-Robust Rolling Shutter Correction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+D">Delin Qu</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yizhen Lao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhigang Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bin Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuelong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.18125v3-abstract-short" style="display: inline;"> This paper addresses the problem of rolling shutter correction in complex nonlinear and dynamic scenes with extreme occlusion. Existing methods suffer from two main drawbacks. Firstly, they face challenges in estimating the accurate correction field due to the uniform velocity assumption, leading to significant image correction errors under complex motion. Secondly, the drastic occlusion in dynami… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.18125v3-abstract-full').style.display = 'inline'; document.getElementById('2303.18125v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.18125v3-abstract-full" style="display: none;"> This paper addresses the problem of rolling shutter correction in complex nonlinear and dynamic scenes with extreme occlusion. Existing methods suffer from two main drawbacks. Firstly, they face challenges in estimating the accurate correction field due to the uniform velocity assumption, leading to significant image correction errors under complex motion. Secondly, the drastic occlusion in dynamic scenes prevents current solutions from achieving better image quality because of the inherent difficulties in aligning and aggregating multiple frames. To tackle these challenges, we model the curvilinear trajectory of pixels analytically and propose a geometry-based Quadratic Rolling Shutter (QRS) motion solver, which precisely estimates the high-order correction field of individual pixels. Besides, to reconstruct high-quality occlusion frames in dynamic scenes, we present a 3D video architecture that effectively Aligns and Aggregates multi-frame context, namely, RSA2-Net. We evaluate our method across a broad range of cameras and video sequences, demonstrating its significant superiority. Specifically, our method surpasses the state-of-the-art by +4.98, +0.77, and +4.33 of PSNR on Carla-RS, Fastec-RS, and BS-RSC datasets, respectively. Code is available at https://github.com/DelinQu/qrsc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.18125v3-abstract-full').style.display = 'none'; document.getElementById('2303.18125v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted at ICCV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.02237">arXiv:2303.02237</a> <span> [<a href="https://arxiv.org/pdf/2303.02237">pdf</a>, <a href="https://arxiv.org/format/2303.02237">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TIFS.2023.3338553">10.1109/TIFS.2023.3338553 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PaReNTT: Low-Latency Parallel Residue Number System and NTT-Based Long Polynomial Modular Multiplication for Homomorphic Encryption </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+W">Weihang Tan</a>, <a href="/search/cs?searchtype=author&query=Chiu%2C+S">Sin-Wei Chiu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A">Antian Wang</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Parhi%2C+K+K">Keshab K. Parhi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.02237v2-abstract-short" style="display: inline;"> High-speed long polynomial multiplication is important for applications in homomorphic encryption (HE) and lattice-based cryptosystems. This paper addresses low-latency hardware architectures for long polynomial modular multiplication using the number-theoretic transform (NTT) and inverse NTT (iNTT). Chinese remainder theorem (CRT) is used to decompose the modulus into multiple smaller moduli. Our… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.02237v2-abstract-full').style.display = 'inline'; document.getElementById('2303.02237v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.02237v2-abstract-full" style="display: none;"> High-speed long polynomial multiplication is important for applications in homomorphic encryption (HE) and lattice-based cryptosystems. This paper addresses low-latency hardware architectures for long polynomial modular multiplication using the number-theoretic transform (NTT) and inverse NTT (iNTT). Chinese remainder theorem (CRT) is used to decompose the modulus into multiple smaller moduli. Our proposed architecture, namely PaReNTT, makes four novel contributions. First, parallel NTT and iNTT architectures are proposed to reduce the number of clock cycles to process the polynomials. This can enable real-time processing for HE applications, as the number of clock cycles to process the polynomial is inversely proportional to the level of parallelism. Second, the proposed architecture eliminates the need for permuting the NTT outputs before their product is input to the iNTT. This reduces latency by n/4 clock cycles, where n is the length of the polynomial, and reduces buffer requirement by one delay-switch-delay circuit of size n. Third, an approach to select special moduli is presented where the moduli can be expressed in terms of a few signed power-of-two terms. Fourth, novel architectures for pre-processing for computing residual polynomials using the CRT and post-processing for combining the residual polynomials are proposed. These architectures significantly reduce the area consumption of the pre-processing and post-processing steps. The proposed long modular polynomial multiplications are ideal for applications that require low latency and high sample rate as these feed-forward architectures can be pipelined at arbitrary levels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.02237v2-abstract-full').style.display = 'none'; document.getElementById('2303.02237v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Information Forensics and Security, Vol. 19, pp. 1646-1659, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.08505">arXiv:2302.08505</a> <span> [<a href="https://arxiv.org/pdf/2302.08505">pdf</a>, <a href="https://arxiv.org/format/2302.08505">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Rapid-Motion-Track: Markerless Tracking of Fast Human Motion with Deeper Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Renjie Li</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+C+Y">Chun Yu Lao</a>, <a href="/search/cs?searchtype=author&query=George%2C+R+S">Rebecca St. George</a>, <a href="/search/cs?searchtype=author&query=Lawler%2C+K">Katherine Lawler</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Saurabh Garg</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+S+N">Son N. Tran</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Q">Quan Bai</a>, <a href="/search/cs?searchtype=author&query=Alty%2C+J">Jane Alty</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.08505v1-abstract-short" style="display: inline;"> Objective The coordination of human movement directly reflects function of the central nervous system. Small deficits in movement are often the first sign of an underlying neurological problem. The objective of this research is to develop a new end-to-end, deep learning-based system, Rapid-Motion-Track (RMT) that can track the fastest human movement accurately when webcams or laptop cameras are us… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08505v1-abstract-full').style.display = 'inline'; document.getElementById('2302.08505v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.08505v1-abstract-full" style="display: none;"> Objective The coordination of human movement directly reflects function of the central nervous system. Small deficits in movement are often the first sign of an underlying neurological problem. The objective of this research is to develop a new end-to-end, deep learning-based system, Rapid-Motion-Track (RMT) that can track the fastest human movement accurately when webcams or laptop cameras are used. Materials and Methods We applied RMT to finger tapping, a well-validated test of motor control that is one of the most challenging human motions to track with computer vision due to the small keypoints of digits and the high velocities that are generated. We recorded 160 finger tapping assessments simultaneously with a standard 2D laptop camera (30 frames/sec) and a high-speed wearable sensor-based 3D motion tracking system (250 frames/sec). RMT and a range of DLC models were applied to the video data with tapping frequencies up to 8Hz to extract movement features. Results The movement features (e.g. speed, rhythm, variance) identified with the new RMT system exhibited very high concurrent validity with the gold-standard measurements (97.3\% of RMT measures were within +/-0.5Hz of the Optotrak measures), and outperformed DLC and other advanced computer vision tools (around 88.2\% of DLC measures were within +/-0.5Hz of the Optotrak measures). RMT also accurately tracked a range of other rapid human movements such as foot tapping, head turning and sit-to -stand movements. Conclusion: With the ubiquity of video technology in smart devices, the RMT method holds potential to transform access and accuracy of human movement assessment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08505v1-abstract-full').style.display = 'none'; document.getElementById('2302.08505v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.09194">arXiv:2210.09194</a> <span> [<a href="https://arxiv.org/pdf/2210.09194">pdf</a>, <a href="https://arxiv.org/format/2210.09194">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Marksman Backdoor: Backdoor Attacks with Arbitrary Target Class </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Doan%2C+K+D">Khoa D. Doan</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Ping Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.09194v1-abstract-short" style="display: inline;"> In recent years, machine learning models have been shown to be vulnerable to backdoor attacks. Under such attacks, an adversary embeds a stealthy backdoor into the trained model such that the compromised models will behave normally on clean inputs but will misclassify according to the adversary's control on maliciously constructed input with a trigger. While these existing attacks are very effecti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.09194v1-abstract-full').style.display = 'inline'; document.getElementById('2210.09194v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.09194v1-abstract-full" style="display: none;"> In recent years, machine learning models have been shown to be vulnerable to backdoor attacks. Under such attacks, an adversary embeds a stealthy backdoor into the trained model such that the compromised models will behave normally on clean inputs but will misclassify according to the adversary's control on maliciously constructed input with a trigger. While these existing attacks are very effective, the adversary's capability is limited: given an input, these attacks can only cause the model to misclassify toward a single pre-defined or target class. In contrast, this paper exploits a novel backdoor attack with a much more powerful payload, denoted as Marksman, where the adversary can arbitrarily choose which target class the model will misclassify given any input during inference. To achieve this goal, we propose to represent the trigger function as a class-conditional generative model and to inject the backdoor in a constrained optimization framework, where the trigger function learns to generate an optimal trigger pattern to attack any target class at will while simultaneously embedding this generative backdoor into the trained model. Given the learned trigger-generation function, during inference, the adversary can specify an arbitrary backdoor attack target class, and an appropriate trigger causing the model to classify toward this target class is created accordingly. We show empirically that the proposed framework achieves high attack performance while preserving the clean-data performance in several benchmark datasets, including MNIST, CIFAR10, GTSRB, and TinyImageNet. The proposed Marksman backdoor attack can also easily bypass existing backdoor defenses that were originally designed against backdoor attacks with a single target class. Our work takes another significant step toward understanding the extensive risks of backdoor attacks in practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.09194v1-abstract-full').style.display = 'none'; document.getElementById('2210.09194v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.05666">arXiv:2210.05666</a> <span> [<a href="https://arxiv.org/pdf/2210.05666">pdf</a>, <a href="https://arxiv.org/format/2210.05666">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Point Transformer V2: Grouped Vector Attention and Partition-based Pooling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaoyang Wu</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yixing Lao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Li Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xihui Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengshuang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.05666v2-abstract-short" style="display: inline;"> As a pioneering work exploring transformer architecture for 3D point cloud understanding, Point Transformer achieves impressive results on multiple highly competitive benchmarks. In this work, we analyze the limitations of the Point Transformer and propose our powerful and efficient Point Transformer V2 model with novel designs that overcome the limitations of previous work. In particular, we firs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.05666v2-abstract-full').style.display = 'inline'; document.getElementById('2210.05666v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.05666v2-abstract-full" style="display: none;"> As a pioneering work exploring transformer architecture for 3D point cloud understanding, Point Transformer achieves impressive results on multiple highly competitive benchmarks. In this work, we analyze the limitations of the Point Transformer and propose our powerful and efficient Point Transformer V2 model with novel designs that overcome the limitations of previous work. In particular, we first propose group vector attention, which is more effective than the previous version of vector attention. Inheriting the advantages of both learnable weight encoding and multi-head attention, we present a highly effective implementation of grouped vector attention with a novel grouped weight encoding layer. We also strengthen the position information for attention by an additional position encoding multiplier. Furthermore, we design novel and lightweight partition-based pooling methods which enable better spatial alignment and more efficient sampling. Extensive experiments show that our model achieves better performance than its predecessor and achieves state-of-the-art on several challenging 3D point cloud understanding benchmarks, including 3D point cloud segmentation on ScanNet v2 and S3DIS and 3D point cloud classification on ModelNet40. Our code will be available at https://github.com/Gofinge/PointTransformerV2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.05666v2-abstract-full').style.display = 'none'; document.getElementById('2210.05666v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.08503">arXiv:2209.08503</a> <span> [<a href="https://arxiv.org/pdf/2209.08503">pdf</a>, <a href="https://arxiv.org/format/2209.08503">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Revisiting Rolling Shutter Bundle Adjustment: Toward Accurate and Fast Solution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+B">Bangyan Liao</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+D">Delin Qu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Y">Yifei Xue</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huiqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yizhen Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.08503v3-abstract-short" style="display: inline;"> We propose a robust and fast bundle adjustment solution that estimates the 6-DoF pose of the camera and the geometry of the environment based on measurements from a rolling shutter (RS) camera. This tackles the challenges in the existing works, namely relying on additional sensors, high frame rate video as input, restrictive assumptions on camera motion, readout direction, and poor efficiency. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08503v3-abstract-full').style.display = 'inline'; document.getElementById('2209.08503v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.08503v3-abstract-full" style="display: none;"> We propose a robust and fast bundle adjustment solution that estimates the 6-DoF pose of the camera and the geometry of the environment based on measurements from a rolling shutter (RS) camera. This tackles the challenges in the existing works, namely relying on additional sensors, high frame rate video as input, restrictive assumptions on camera motion, readout direction, and poor efficiency. To this end, we first investigate the influence of normalization to the image point on RSBA performance and show its better approximation in modelling the real 6-DoF camera motion. Then we present a novel analytical model for the visual residual covariance, which can be used to standardize the reprojection error during the optimization, consequently improving the overall accuracy. More importantly, the combination of normalization and covariance standardization weighting in RSBA (NW-RSBA) can avoid common planar degeneracy without needing to constrain the filming manner. Besides, we propose an acceleration strategy for NW-RSBA based on the sparsity of its Jacobian matrix and Schur complement. The extensive synthetic and real data experiments verify the effectiveness and efficiency of the proposed solution over the state-of-the-art works. We also demonstrate the proposed method can be easily implemented and plug-in famous GSSfM and GSSLAM systems as completed RSSfM and RSSLAM solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08503v3-abstract-full').style.display = 'none'; document.getElementById('2209.08503v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CVPR 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.14270">arXiv:2208.14270</a> <span> [<a href="https://arxiv.org/pdf/2208.14270">pdf</a>, <a href="https://arxiv.org/format/2208.14270">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Integral Sampler and Polynomial Multiplication Architecture for Lattice-based Cryptography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+A">Antian Wang</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+W">Weihang Tan</a>, <a href="/search/cs?searchtype=author&query=Parhi%2C+K+K">Keshab K. Parhi</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.14270v1-abstract-short" style="display: inline;"> With the surge of the powerful quantum computer, lattice-based cryptography proliferated the latest cryptography hardware implementation due to its resistance against quantum computers. Among the computational blocks of lattice-based cryptography, the random errors produced by the sampler play a key role in ensuring the security of these schemes. This paper proposes an integral architecture for th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.14270v1-abstract-full').style.display = 'inline'; document.getElementById('2208.14270v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.14270v1-abstract-full" style="display: none;"> With the surge of the powerful quantum computer, lattice-based cryptography proliferated the latest cryptography hardware implementation due to its resistance against quantum computers. Among the computational blocks of lattice-based cryptography, the random errors produced by the sampler play a key role in ensuring the security of these schemes. This paper proposes an integral architecture for the sampler, which can reduce the overall resource consumption by reusing the multipliers and adders within the modular polynomial computation. For instance, our experimental results show that the proposed design can effectively reduce the discrete Ziggurat sampling method in DSP usage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.14270v1-abstract-full').style.display = 'none'; document.getElementById('2208.14270v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, accepted by 35th IEEE Int. Symposium on Defect and Fault Tolerance in VLSI and Nanotechnology Systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.13361">arXiv:2208.13361</a> <span> [<a href="https://arxiv.org/pdf/2208.13361">pdf</a>, <a href="https://arxiv.org/format/2208.13361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> NL2GDPR: Automatically Develop GDPR Compliant Android Application Features from Natural Language </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shezan%2C+F+H">Faysal Hossain Shezan</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+M">Minlong Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingming Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Ping Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.13361v1-abstract-short" style="display: inline;"> The recent privacy leakage incidences and the more strict policy regulations demand a much higher standard of compliance for companies and mobile apps. However, such obligations also impose significant challenges on app developers for complying with these regulations that contain various perspectives, activities, and roles, especially for small companies and developers who are less experienced in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.13361v1-abstract-full').style.display = 'inline'; document.getElementById('2208.13361v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.13361v1-abstract-full" style="display: none;"> The recent privacy leakage incidences and the more strict policy regulations demand a much higher standard of compliance for companies and mobile apps. However, such obligations also impose significant challenges on app developers for complying with these regulations that contain various perspectives, activities, and roles, especially for small companies and developers who are less experienced in this matter or with limited resources. To address these hurdles, we develop an automatic tool, NL2GDPR, which can generate policies from natural language descriptions from the developer while also ensuring the app's functionalities are compliant with General Data Protection Regulation (GDPR). NL2GDPR is developed by leveraging an information extraction tool, OIA (Open Information Annotation), developed by Baidu Cognitive Computing Lab. At the core, NL2GDPR is a privacy-centric information extraction model, appended with a GDPR policy finder and a policy generator. We perform a comprehensive study to grasp the challenges in extracting privacy-centric information and generating privacy policies, while exploiting optimizations for this specific task. With NL2GDPR, we can achieve 92.9%, 95.2%, and 98.4% accuracy in correctly identifying GDPR policies related to personal data storage, process, and share types, respectively. To the best of our knowledge, NL2GDPR is the first tool that allows a developer to automatically generate GDPR compliant policies, with only the need of entering the natural language for describing the app features. Note that other non-GDPR-related features might be integrated with the generated features to build a complex app. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.13361v1-abstract-full').style.display = 'none'; document.getElementById('2208.13361v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">37 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.07678">arXiv:2208.07678</a> <span> [<a href="https://arxiv.org/pdf/2208.07678">pdf</a>, <a href="https://arxiv.org/format/2208.07678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FEC: Fast Euclidean Clustering for Point Cloud Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yu Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yancheng Wang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Y">Yifei Xue</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huiqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yizhen Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.07678v2-abstract-short" style="display: inline;"> Segmentation from point cloud data is essential in many applications such as remote sensing, mobile robots, or autonomous cars. However, the point clouds captured by the 3D range sensor are commonly sparse and unstructured, challenging efficient segmentation. In this paper, we present a fast solution to point cloud instance segmentation with small computational demands. To this end, we propose a n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.07678v2-abstract-full').style.display = 'inline'; document.getElementById('2208.07678v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.07678v2-abstract-full" style="display: none;"> Segmentation from point cloud data is essential in many applications such as remote sensing, mobile robots, or autonomous cars. However, the point clouds captured by the 3D range sensor are commonly sparse and unstructured, challenging efficient segmentation. In this paper, we present a fast solution to point cloud instance segmentation with small computational demands. To this end, we propose a novel fast Euclidean clustering (FEC) algorithm which applies a pointwise scheme over the clusterwise scheme used in existing works. Our approach is conceptually simple, easy to implement (40 lines in C++), and achieves two orders of magnitudes faster against the classical segmentation methods while producing high-quality results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.07678v2-abstract-full').style.display = 'none'; document.getElementById('2208.07678v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.12381">arXiv:2206.12381</a> <span> [<a href="https://arxiv.org/pdf/2206.12381">pdf</a>, <a href="https://arxiv.org/format/2206.12381">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Defending Backdoor Attacks on Vision Transformer via Patch Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Doan%2C+K+D">Khoa D. Doan</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peng Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Ping Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.12381v2-abstract-short" style="display: inline;"> Vision Transformers (ViTs) have a radically different architecture with significantly less inductive bias than Convolutional Neural Networks. Along with the improvement in performance, security and robustness of ViTs are also of great importance to study. In contrast to many recent works that exploit the robustness of ViTs against adversarial examples, this paper investigates a representative caus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.12381v2-abstract-full').style.display = 'inline'; document.getElementById('2206.12381v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.12381v2-abstract-full" style="display: none;"> Vision Transformers (ViTs) have a radically different architecture with significantly less inductive bias than Convolutional Neural Networks. Along with the improvement in performance, security and robustness of ViTs are also of great importance to study. In contrast to many recent works that exploit the robustness of ViTs against adversarial examples, this paper investigates a representative causative attack, i.e., backdoor. We first examine the vulnerability of ViTs against various backdoor attacks and find that ViTs are also quite vulnerable to existing attacks. However, we observe that the clean-data accuracy and backdoor attack success rate of ViTs respond distinctively to patch transformations before the positional encoding. Then, based on this finding, we propose an effective method for ViTs to defend both patch-based and blending-based trigger backdoor attacks via patch processing. The performances are evaluated on several benchmark datasets, including CIFAR10, GTSRB, and TinyImageNet, which show the proposed novel defense is very successful in mitigating backdoor attacks for ViTs. To the best of our knowledge, this paper presents the first defensive strategy that utilizes a unique characteristic of ViTs against backdoor attacks. The paper will appear in the Proceedings of the AAAI'23 Conference. This work was initially submitted in November 2021 to CVPR'22, then it was re-submitted to ECCV'22. The paper was made public in June 2022. The authors sincerely thank all the referees from the Program Committees of CVPR'22, ECCV'22, and AAAI'23. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.12381v2-abstract-full').style.display = 'none'; document.getElementById('2206.12381v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.15444">arXiv:2205.15444</a> <span> [<a href="https://arxiv.org/pdf/2205.15444">pdf</a>, <a href="https://arxiv.org/format/2205.15444">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Integrity Authentication in Tree Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Weijie Zhao</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Ping Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.15444v2-abstract-short" style="display: inline;"> Tree models are very widely used in practice of machine learning and data mining. In this paper, we study the problem of model integrity authentication in tree models. In general, the task of model integrity authentication is the design \& implementation of mechanisms for checking/detecting whether the model deployed for the end-users has been tampered with or compromised, e.g., malicious modifica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.15444v2-abstract-full').style.display = 'inline'; document.getElementById('2205.15444v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.15444v2-abstract-full" style="display: none;"> Tree models are very widely used in practice of machine learning and data mining. In this paper, we study the problem of model integrity authentication in tree models. In general, the task of model integrity authentication is the design \& implementation of mechanisms for checking/detecting whether the model deployed for the end-users has been tampered with or compromised, e.g., malicious modifications on the model. We propose an authentication framework that enables the model builders/distributors to embed a signature to the tree model and authenticate the existence of the signature by only making a small number of black-box queries to the model. To the best of our knowledge, this is the first study of signature embedding on tree models. Our proposed method simply locates a collection of leaves and modifies their prediction values, which does not require any training/testing data nor any re-training. The experiments on a large number of public classification datasets confirm that the proposed signature embedding process has a high success rate while only introducing a minimal prediction accuracy loss. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.15444v2-abstract-full').style.display = 'none'; document.getElementById('2205.15444v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.12127">arXiv:2110.12127</a> <span> [<a href="https://arxiv.org/pdf/2110.12127">pdf</a>, <a href="https://arxiv.org/format/2110.12127">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TC.2023.3251847">10.1109/TC.2023.3251847 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> High-Speed VLSI Architectures for Modular Polynomial Multiplication via Fast Filtering and Applications to Lattice-Based Cryptography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+W">Weihang Tan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A">Antian Wang</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinmiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Parhi%2C+K+K">Keshab K. Parhi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.12127v2-abstract-short" style="display: inline;"> This paper presents a low-latency hardware accelerator for modular polynomial multiplication for lattice-based post-quantum cryptography and homomorphic encryption applications. The proposed novel modular polynomial multiplier exploits the fast finite impulse response (FIR) filter architecture to reduce the computational complexity of the schoolbook modular polynomial multiplication. We also exten… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.12127v2-abstract-full').style.display = 'inline'; document.getElementById('2110.12127v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.12127v2-abstract-full" style="display: none;"> This paper presents a low-latency hardware accelerator for modular polynomial multiplication for lattice-based post-quantum cryptography and homomorphic encryption applications. The proposed novel modular polynomial multiplier exploits the fast finite impulse response (FIR) filter architecture to reduce the computational complexity of the schoolbook modular polynomial multiplication. We also extend this structure to fast $M$-parallel architectures while achieving low-latency, high-speed, and full hardware utilization. We comprehensively evaluate the performance of the proposed architectures under various polynomial settings as well as in the Saber scheme for post-quantum cryptography as a case study. The experimental results show that our proposed modular polynomial multiplier reduces the computation time and area-time product, respectively, compared to the state-of-the-art designs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.12127v2-abstract-full').style.display = 'none'; document.getElementById('2110.12127v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Trans. on Computers, 72(9), pp. 2454-2466, Sept. 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.00511">arXiv:2110.00511</a> <span> [<a href="https://arxiv.org/pdf/2110.00511">pdf</a>, <a href="https://arxiv.org/format/2110.00511">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> ASH: A Modern Framework for Parallel Spatial Hashing in 3D Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+W">Wei Dong</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yixing Lao</a>, <a href="/search/cs?searchtype=author&query=Kaess%2C+M">Michael Kaess</a>, <a href="/search/cs?searchtype=author&query=Koltun%2C+V">Vladlen Koltun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.00511v2-abstract-short" style="display: inline;"> We present ASH, a modern and high-performance framework for parallel spatial hashing on GPU. Compared to existing GPU hash map implementations, ASH achieves higher performance, supports richer functionality, and requires fewer lines of code (LoC) when used for implementing spatially varying operations from volumetric geometry reconstruction to differentiable appearance reconstruction. Unlike exist… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.00511v2-abstract-full').style.display = 'inline'; document.getElementById('2110.00511v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.00511v2-abstract-full" style="display: none;"> We present ASH, a modern and high-performance framework for parallel spatial hashing on GPU. Compared to existing GPU hash map implementations, ASH achieves higher performance, supports richer functionality, and requires fewer lines of code (LoC) when used for implementing spatially varying operations from volumetric geometry reconstruction to differentiable appearance reconstruction. Unlike existing GPU hash maps, the ASH framework provides a versatile tensor interface, hiding low-level details from the users. In addition, by decoupling the internal hashing data structures and key-value data in buffers, we offer direct access to spatially varying data via indices, enabling seamless integration to modern libraries such as PyTorch. To achieve this, we 1) detach stored key-value data from the low-level hash map implementation; 2) bridge the pointer-first low level data structures to index-first high-level tensor interfaces via an index heap; 3) adapt both generic and non-generic integer-only hash map implementations as backends to operate on multi-dimensional keys. We first profile our hash map against state-of-the-art hash maps on synthetic data to show the performance gain from this architecture. We then show that ASH can consistently achieve higher performance on various large-scale 3D perception tasks with fewer LoC by showcasing several applications, including 1) point cloud voxelization, 2) retargetable volumetric scene reconstruction, 3) non-rigid point cloud registration and volumetric deformation, and 4) spatially varying geometry and appearance refinement. ASH and its example applications are open sourced in Open3D (http://www.open3d.org). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.00511v2-abstract-full').style.display = 'none'; document.getElementById('2110.00511v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 19 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.00047">arXiv:2008.00047</a> <span> [<a href="https://arxiv.org/pdf/2008.00047">pdf</a>, <a href="https://arxiv.org/format/2008.00047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Towards Class-Oriented Poisoning Attacks Against Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bingyin Zhao</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.00047v2-abstract-short" style="display: inline;"> Poisoning attacks on machine learning systems compromise the model performance by deliberately injecting malicious samples in the training dataset to influence the training process. Prior works focus on either availability attacks (i.e., lowering the overall model accuracy) or integrity attacks (i.e., enabling specific instance-based backdoor). In this paper, we advance the adversarial objectives… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.00047v2-abstract-full').style.display = 'inline'; document.getElementById('2008.00047v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.00047v2-abstract-full" style="display: none;"> Poisoning attacks on machine learning systems compromise the model performance by deliberately injecting malicious samples in the training dataset to influence the training process. Prior works focus on either availability attacks (i.e., lowering the overall model accuracy) or integrity attacks (i.e., enabling specific instance-based backdoor). In this paper, we advance the adversarial objectives of the availability attacks to a per-class basis, which we refer to as class-oriented poisoning attacks. We demonstrate that the proposed attack is capable of forcing the corrupted model to predict in two specific ways: (i) classify unseen new images to a targeted "supplanter" class, and (ii) misclassify images from a "victim" class while maintaining the classification accuracy on other non-victim classes. To maximize the adversarial effect as well as reduce the computational complexity of poisoned data generation, we propose a gradient-based framework that crafts poisoning images with carefully manipulated feature information for each scenario. Using newly defined metrics at the class level, we demonstrate the effectiveness of the proposed class-oriented poisoning attacks on various models (e.g., LeNet-5, Vgg-9, and ResNet-50) over a wide range of datasets (e.g., MNIST, CIFAR-10, and ImageNet-ILSVRC2012) in an end-to-end training setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.00047v2-abstract-full').style.display = 'none'; document.getElementById('2008.00047v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 9 figures, accepted by Winter Conference on Applications of Computer Vision (WACV) 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.11688">arXiv:1903.11688</a> <span> [<a href="https://arxiv.org/pdf/1903.11688">pdf</a>, <a href="https://arxiv.org/format/1903.11688">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Rallying Adversarial Techniques against Deep Learning for Network Security </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clements%2C+J">Joseph Clements</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuzhe Yang</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Ankur Sharma</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Hongxin Hu</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.11688v2-abstract-short" style="display: inline;"> Recent advances in artificial intelligence and the increasing need for powerful defensive measures in the domain of network security, have led to the adoption of deep learning approaches for use in network intrusion detection systems. These methods have achieved superior performance against conventional network attacks, which enable the deployment of practical security systems to unique and dynami… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11688v2-abstract-full').style.display = 'inline'; document.getElementById('1903.11688v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.11688v2-abstract-full" style="display: none;"> Recent advances in artificial intelligence and the increasing need for powerful defensive measures in the domain of network security, have led to the adoption of deep learning approaches for use in network intrusion detection systems. These methods have achieved superior performance against conventional network attacks, which enable the deployment of practical security systems to unique and dynamic sectors. Adversarial machine learning, unfortunately, has recently shown that deep learning models are inherently vulnerable to adversarial modifications on their input data. Because of this susceptibility, the deep learning models deployed to power a network defense could in fact be the weakest entry point for compromising a network system. In this paper, we show that by modifying on average as little as 1.38 of the input features, an adversary can generate malicious inputs which effectively fool a deep learning based NIDS. Therefore, when designing such systems, it is crucial to consider the performance from not only the conventional network security perspective but also the adversarial machine learning domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11688v2-abstract-full').style.display = 'none'; document.getElementById('1903.11688v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by IEEE Symposium Series on Computational Intelligence (IEEE SSCI 2021)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.10121">arXiv:1810.10121</a> <span> [<a href="https://arxiv.org/pdf/1810.10121">pdf</a>, <a href="https://arxiv.org/format/1810.10121">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> nGraph-HE: A Graph Compiler for Deep Learning on Homomorphically Encrypted Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Boemer%2C+F">Fabian Boemer</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yixing Lao</a>, <a href="/search/cs?searchtype=author&query=Cammarota%2C+R">Rosario Cammarota</a>, <a href="/search/cs?searchtype=author&query=Wierzynski%2C+C">Casimir Wierzynski</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.10121v3-abstract-short" style="display: inline;"> Homomorphic encryption (HE)---the ability to perform computation on encrypted data---is an attractive remedy to increasing concerns about data privacy in deep learning (DL). However, building DL models that operate on ciphertext is currently labor-intensive and requires simultaneous expertise in DL, cryptography, and software engineering. DL frameworks and recent advances in graph compilers have g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.10121v3-abstract-full').style.display = 'inline'; document.getElementById('1810.10121v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.10121v3-abstract-full" style="display: none;"> Homomorphic encryption (HE)---the ability to perform computation on encrypted data---is an attractive remedy to increasing concerns about data privacy in deep learning (DL). However, building DL models that operate on ciphertext is currently labor-intensive and requires simultaneous expertise in DL, cryptography, and software engineering. DL frameworks and recent advances in graph compilers have greatly accelerated the training and deployment of DL models to various computing platforms. We introduce nGraph-HE, an extension of nGraph, Intel's DL graph compiler, which enables deployment of trained models with popular frameworks such as TensorFlow while simply treating HE as another hardware target. Our graph-compiler approach enables HE-aware optimizations-- implemented at compile-time, such as constant folding and HE-SIMD packing, and at run-time, such as special value plaintext bypass. Furthermore, nGraph-HE integrates with DL frameworks such as TensorFlow, enabling data scientists to benchmark DL models with minimal overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.10121v3-abstract-full').style.display = 'none'; document.getElementById('1810.10121v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.05768">arXiv:1806.05768</a> <span> [<a href="https://arxiv.org/pdf/1806.05768">pdf</a>, <a href="https://arxiv.org/format/1806.05768">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Hardware Trojan Attacks on Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clements%2C+J">Joseph Clements</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yingjie Lao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.05768v1-abstract-short" style="display: inline;"> With the rising popularity of machine learning and the ever increasing demand for computational power, there is a growing need for hardware optimized implementations of neural networks and other machine learning models. As the technology evolves, it is also plausible that machine learning or artificial intelligence will soon become consumer electronic products and military equipment, in the form o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.05768v1-abstract-full').style.display = 'inline'; document.getElementById('1806.05768v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.05768v1-abstract-full" style="display: none;"> With the rising popularity of machine learning and the ever increasing demand for computational power, there is a growing need for hardware optimized implementations of neural networks and other machine learning models. As the technology evolves, it is also plausible that machine learning or artificial intelligence will soon become consumer electronic products and military equipment, in the form of well-trained models. Unfortunately, the modern fabless business model of manufacturing hardware, while economic, leads to deficiencies in security through the supply chain. In this paper, we illuminate these security issues by introducing hardware Trojan attacks on neural networks, expanding the current taxonomy of neural network security to incorporate attacks of this nature. To aid in this, we develop a novel framework for inserting malicious hardware Trojans in the implementation of a neural network classifier. We evaluate the capabilities of the adversary in this setting by implementing the attack algorithm on convolutional neural networks while controlling a variety of parameters available to the adversary. Our experimental results show that the proposed algorithm could effectively classify a selected input trigger as a specified class on the MNIST dataset by injecting hardware Trojans into $0.03\%$, on average, of neurons in the 5th hidden layer of arbitrary 7-layer convolutional neural networks, while undetectable under the test data. Finally, we discuss the potential defenses to protect neural networks against hardware Trojan attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.05768v1-abstract-full').style.display = 'none'; document.getElementById('1806.05768v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1804.10911">arXiv:1804.10911</a> <span> [<a href="https://arxiv.org/pdf/1804.10911">pdf</a>, <a href="https://arxiv.org/ps/1804.10911">ps</a>, <a href="https://arxiv.org/format/1804.10911">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> A Tree Search Algorithm for Sequence Labeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yadi Lao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jun Xu</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+Y">Yanyan Lan</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Sheng Gao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1804.10911v2-abstract-short" style="display: inline;"> In this paper we propose a novel reinforcement learning based model for sequence tagging, referred to as MM-Tag. Inspired by the success and methodology of the AlphaGo Zero, MM-Tag formalizes the problem of sequence tagging with a Monte Carlo tree search (MCTS) enhanced Markov decision process (MDP) model, in which the time steps correspond to the positions of words in a sentence from left to righ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.10911v2-abstract-full').style.display = 'inline'; document.getElementById('1804.10911v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1804.10911v2-abstract-full" style="display: none;"> In this paper we propose a novel reinforcement learning based model for sequence tagging, referred to as MM-Tag. Inspired by the success and methodology of the AlphaGo Zero, MM-Tag formalizes the problem of sequence tagging with a Monte Carlo tree search (MCTS) enhanced Markov decision process (MDP) model, in which the time steps correspond to the positions of words in a sentence from left to right, and each action corresponds to assign a tag to a word. Two long short-term memory networks (LSTM) are used to summarize the past tag assignments and words in the sentence. Based on the outputs of LSTMs, the policy for guiding the tag assignment and the value for predicting the whole tagging accuracy of the whole sentence are produced. The policy and value are then strengthened with MCTS, which takes the produced raw policy and value as inputs, simulates and evaluates the possible tag assignments at the subsequent positions, and outputs a better search policy for assigning tags. A reinforcement learning algorithm is proposed to train the model parameters. Our work is the first to apply the MCTS enhanced MDP model to the sequence tagging task. We show that MM-Tag can accurately predict the tags thanks to the exploratory decision making mechanism introduced by MCTS. Experimental results show based on a chunking benchmark showed that MM-Tag outperformed the state-of-the-art sequence tagging baselines including CRF and CRF with LSTM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.10911v2-abstract-full').style.display = 'none'; document.getElementById('1804.10911v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 April, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.08058">arXiv:1801.08058</a> <span> [<a href="https://arxiv.org/pdf/1801.08058">pdf</a>, <a href="https://arxiv.org/format/1801.08058">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Intel nGraph: An Intermediate Representation, Compiler, and Executor for Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cyphers%2C+S">Scott Cyphers</a>, <a href="/search/cs?searchtype=author&query=Bansal%2C+A+K">Arjun K. Bansal</a>, <a href="/search/cs?searchtype=author&query=Bhiwandiwalla%2C+A">Anahita Bhiwandiwalla</a>, <a href="/search/cs?searchtype=author&query=Bobba%2C+J">Jayaram Bobba</a>, <a href="/search/cs?searchtype=author&query=Brookhart%2C+M">Matthew Brookhart</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+A">Avijit Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Constable%2C+W">Will Constable</a>, <a href="/search/cs?searchtype=author&query=Convey%2C+C">Christian Convey</a>, <a href="/search/cs?searchtype=author&query=Cook%2C+L">Leona Cook</a>, <a href="/search/cs?searchtype=author&query=Kanawi%2C+O">Omar Kanawi</a>, <a href="/search/cs?searchtype=author&query=Kimball%2C+R">Robert Kimball</a>, <a href="/search/cs?searchtype=author&query=Knight%2C+J">Jason Knight</a>, <a href="/search/cs?searchtype=author&query=Korovaiko%2C+N">Nikolay Korovaiko</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+V">Varun Kumar</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+Y">Yixing Lao</a>, <a href="/search/cs?searchtype=author&query=Lishka%2C+C+R">Christopher R. Lishka</a>, <a href="/search/cs?searchtype=author&query=Menon%2C+J">Jaikrishnan Menon</a>, <a href="/search/cs?searchtype=author&query=Myers%2C+J">Jennifer Myers</a>, <a href="/search/cs?searchtype=author&query=Narayana%2C+S+A">Sandeep Aswath Narayana</a>, <a href="/search/cs?searchtype=author&query=Procter%2C+A">Adam Procter</a>, <a href="/search/cs?searchtype=author&query=Webb%2C+T+J">Tristan J. Webb</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.08058v2-abstract-short" style="display: inline;"> The Deep Learning (DL) community sees many novel topologies published each year. Achieving high performance on each new topology remains challenging, as each requires some level of manual effort. This issue is compounded by the proliferation of frameworks and hardware platforms. The current approach, which we call "direct optimization", requires deep changes within each framework to improve the tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.08058v2-abstract-full').style.display = 'inline'; document.getElementById('1801.08058v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.08058v2-abstract-full" style="display: none;"> The Deep Learning (DL) community sees many novel topologies published each year. Achieving high performance on each new topology remains challenging, as each requires some level of manual effort. This issue is compounded by the proliferation of frameworks and hardware platforms. The current approach, which we call "direct optimization", requires deep changes within each framework to improve the training performance for each hardware backend (CPUs, GPUs, FPGAs, ASICs) and requires $\mathcal{O}(fp)$ effort; where $f$ is the number of frameworks and $p$ is the number of platforms. While optimized kernels for deep-learning primitives are provided via libraries like Intel Math Kernel Library for Deep Neural Networks (MKL-DNN), there are several compiler-inspired ways in which performance can be further optimized. Building on our experience creating neon (a fast deep learning library on GPUs), we developed Intel nGraph, a soon to be open-sourced C++ library to simplify the realization of optimized deep learning performance across frameworks and hardware platforms. Initially-supported frameworks include TensorFlow, MXNet, and Intel neon framework. Initial backends are Intel Architecture CPUs (CPU), the Intel(R) Nervana Neural Network Processor(R) (NNP), and NVIDIA GPUs. Currently supported compiler optimizations include efficient memory management and data layout abstraction. In this paper, we describe our overall architecture and its core components. In the future, we envision extending nGraph API support to a wider range of frameworks, hardware (including FPGAs and ASICs), and compiler optimizations (training versus inference optimizations, multi-node and multi-device scaling via efficient sub-graph partitioning, and HW-specific compounding of operations). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.08058v2-abstract-full').style.display = 'none'; document.getElementById('1801.08058v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository