CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 58 results for author: <span class="mathjax">Ye, N</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Ye%2C+N">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ye, N"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ye%2C+N&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ye, N"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ye%2C+N&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ye%2C+N&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+N&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12276">arXiv:2411.12276</a> <span> [<a href="https://arxiv.org/pdf/2411.12276">pdf</a>, <a href="https://arxiv.org/format/2411.12276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> libcll: an Extendable Python Toolkit for Complementary-Label Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nai-Xuan Ye</a>, <a href="/search/cs?searchtype=author&query=Mai%2C+T">Tan-Ha Mai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hsiu-Hsuan Wang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wei-I Lin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hsuan-Tien Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12276v1-abstract-short" style="display: inline;"> Complementary-label learning (CLL) is a weakly supervised learning paradigm for multiclass classification, where only complementary labels -- indicating classes an instance does not belong to -- are provided to the learning algorithm. Despite CLL's increasing popularity, previous studies highlight two main challenges: (1) inconsistent results arising from varied assumptions on complementary label… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12276v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12276v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12276v1-abstract-full" style="display: none;"> Complementary-label learning (CLL) is a weakly supervised learning paradigm for multiclass classification, where only complementary labels -- indicating classes an instance does not belong to -- are provided to the learning algorithm. Despite CLL's increasing popularity, previous studies highlight two main challenges: (1) inconsistent results arising from varied assumptions on complementary label generation, and (2) high barriers to entry due to the lack of a standardized evaluation platform across datasets and algorithms. To address these challenges, we introduce \texttt{libcll}, an extensible Python toolkit for CLL research. \texttt{libcll} provides a universal interface that supports a wide range of generation assumptions, both synthetic and real-world datasets, and key CLL algorithms. The toolkit is designed to mitigate inconsistencies and streamline the research process, with easy installation, comprehensive usage guides, and quickstart tutorials that facilitate efficient adoption and implementation of CLL techniques. Extensive ablation studies conducted with \texttt{libcll} demonstrate its utility in generating valuable insights to advance future CLL research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12276v1-abstract-full').style.display = 'none'; document.getElementById('2411.12276v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06634">arXiv:2411.06634</a> <span> [<a href="https://arxiv.org/pdf/2411.06634">pdf</a>, <a href="https://arxiv.org/format/2411.06634">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Inductive Graph Few-shot Class Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yayong Li</a>, <a href="/search/cs?searchtype=author&query=Moghadam%2C+P">Peyman Moghadam</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+C">Can Peng</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a>, <a href="/search/cs?searchtype=author&query=Koniusz%2C+P">Piotr Koniusz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06634v1-abstract-short" style="display: inline;"> Node classification with Graph Neural Networks (GNN) under a fixed set of labels is well known in contrast to Graph Few-Shot Class Incremental Learning (GFSCIL), which involves learning a GNN classifier as graph nodes and classes growing over time sporadically. We introduce inductive GFSCIL that continually learns novel classes with newly emerging nodes while maintaining performance on old classes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06634v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06634v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06634v1-abstract-full" style="display: none;"> Node classification with Graph Neural Networks (GNN) under a fixed set of labels is well known in contrast to Graph Few-Shot Class Incremental Learning (GFSCIL), which involves learning a GNN classifier as graph nodes and classes growing over time sporadically. We introduce inductive GFSCIL that continually learns novel classes with newly emerging nodes while maintaining performance on old classes without accessing previous data. This addresses the practical concern of transductive GFSCIL, which requires storing the entire graph with historical data. Compared to the transductive GFSCIL, the inductive setting exacerbates catastrophic forgetting due to inaccessible previous data during incremental training, in addition to overfitting issue caused by label sparsity. Thus, we propose a novel method, called Topology-based class Augmentation and Prototype calibration (TAP). To be specific, it first creates a triple-branch multi-topology class augmentation method to enhance model generalization ability. As each incremental session receives a disjoint subgraph with nodes of novel classes, the multi-topology class augmentation method helps replicate such a setting in the base session to boost backbone versatility. In incremental learning, given the limited number of novel class samples, we propose an iterative prototype calibration to improve the separation of class prototypes. Furthermore, as backbone fine-tuning poses the feature distribution drift, prototypes of old classes start failing over time, we propose the prototype shift method for old classes to compensate for the drift. We showcase the proposed method on four datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06634v1-abstract-full').style.display = 'none'; document.getElementById('2411.06634v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.20296">arXiv:2409.20296</a> <span> [<a href="https://arxiv.org/pdf/2409.20296">pdf</a>, <a href="https://arxiv.org/format/2409.20296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PersonalLLM: Tailoring LLMs to Individual Preferences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zollo%2C+T+P">Thomas P. Zollo</a>, <a href="/search/cs?searchtype=author&query=Siah%2C+A+W+T">Andrew Wei Tung Siah</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Naimeng Ye</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ang Li</a>, <a href="/search/cs?searchtype=author&query=Namkoong%2C+H">Hongseok Namkoong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.20296v1-abstract-short" style="display: inline;"> As LLMs become capable of complex tasks, there is growing potential for personalized interactions tailored to the subtle and idiosyncratic preferences of the user. We present a public benchmark, PersonalLLM, focusing on adapting LLMs to provide maximal benefits for a particular user. Departing from existing alignment benchmarks that implicitly assume uniform preferences, we curate open-ended promp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20296v1-abstract-full').style.display = 'inline'; document.getElementById('2409.20296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.20296v1-abstract-full" style="display: none;"> As LLMs become capable of complex tasks, there is growing potential for personalized interactions tailored to the subtle and idiosyncratic preferences of the user. We present a public benchmark, PersonalLLM, focusing on adapting LLMs to provide maximal benefits for a particular user. Departing from existing alignment benchmarks that implicitly assume uniform preferences, we curate open-ended prompts paired with many high-quality answers over which users would be expected to display heterogeneous latent preferences. Instead of persona-prompting LLMs based on high-level attributes (e.g., user's race or response length), which yields homogeneous preferences relative to humans, we develop a method that can simulate a large user base with diverse preferences from a set of pre-trained reward models. Our dataset and generated personalities offer an innovative testbed for developing personalization algorithms that grapple with continual data sparsity--few relevant feedback from the particular user--by leveraging historical data from other (similar) users. We explore basic in-context learning and meta-learning baselines to illustrate the utility of PersonalLLM and highlight the need for future methodological development. Our dataset is available at https://huggingface.co/datasets/namkoong-lab/PersonalLLM <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20296v1-abstract-full').style.display = 'none'; document.getElementById('2409.20296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7; I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15608">arXiv:2409.15608</a> <span> [<a href="https://arxiv.org/pdf/2409.15608">pdf</a>, <a href="https://arxiv.org/format/2409.15608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning Approach for Knee Point Detection on Noisy Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fok%2C+T+Y">Ting Yan Fok</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nong Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15608v1-abstract-short" style="display: inline;"> A knee point on a curve is the one where the curve levels off after an increase. In a computer system, it marks the point at which the system's performance is no longer improving significantly despite adding extra resources. Thus a knee point often represents an optimal point for decision. However, identifying knee points in noisy data is a challenging task. All previous works defined knee points… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15608v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15608v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15608v1-abstract-full" style="display: none;"> A knee point on a curve is the one where the curve levels off after an increase. In a computer system, it marks the point at which the system's performance is no longer improving significantly despite adding extra resources. Thus a knee point often represents an optimal point for decision. However, identifying knee points in noisy data is a challenging task. All previous works defined knee points based on the data in the original scale. However, in this work, we define knee points based on normalized data and provide a mathematical definition of curvature for normalized discrete data points, based on the mathematical definition of curvature for continuous functions. The impact of normalization exerted on curvature and the location of knee points are also discussed. Nevertheless, assessing the effectiveness of methods is difficult in the absence of ground truth data and benchmark datasets, which makes comparing existing methods challenging. In view of this, we create synthetic data that simulate real-world scenarios. We achieve this by selecting a set of functions that possess the required characteristics in this research and then introducing noise that satisfies the underlying distribution. In addition, we present a deep-learning approach and employ a Convolutional Neural Network (CNN) with a U-Net-like architecture, to accurately detect the knee point(s) of the underlying true distribution. The proposed model is evaluated against state-of-the-art methods. Experiments show that our network outperforms existing methods in all synthetic datasets, regardless of whether the samples have single or multiple knee points. In fact, our model achieves the best $F_{1}$ scores among all existing methods in all the test sets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15608v1-abstract-full').style.display = 'none'; document.getElementById('2409.15608v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03307">arXiv:2408.03307</a> <span> [<a href="https://arxiv.org/pdf/2408.03307">pdf</a>, <a href="https://arxiv.org/format/2408.03307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exchangeable Sequence Models Can Naturally Quantify Uncertainty Over Latent Concepts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+N">Naimeng Ye</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hanming Yang</a>, <a href="/search/cs?searchtype=author&query=Siah%2C+A">Andrew Siah</a>, <a href="/search/cs?searchtype=author&query=Namkoong%2C+H">Hongseok Namkoong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03307v2-abstract-short" style="display: inline;"> Intelligent agents must be able to articulate its own uncertainty. In this work, we show that pre-trained sequence models are naturally capable of probabilistic reasoning over exchangeable data points -- forming informed beliefs and sharpening them as it gathers more information. A sequence model learns the relationship between observations, which differs from typical Bayesian models that quantify… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03307v2-abstract-full').style.display = 'inline'; document.getElementById('2408.03307v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03307v2-abstract-full" style="display: none;"> Intelligent agents must be able to articulate its own uncertainty. In this work, we show that pre-trained sequence models are naturally capable of probabilistic reasoning over exchangeable data points -- forming informed beliefs and sharpening them as it gathers more information. A sequence model learns the relationship between observations, which differs from typical Bayesian models that quantify uncertainty over latent parameters through priors and likelihoods (e.g., topic models). Despite the apparent difference, we illustrate how exchangeable sequence modeling provides a valid Bayesian model by going back to De Finetti's classical predictive view of probabilistic reasoning: uncertainty comes from data that has not been observed yet, rather than latent parameters. From this perspective, pre-training autoregressive models is equivalent to formulating informed beliefs based on prior observations ("empirical Bayes"), and forward generation is equivalent to simulating instantiations of an environment ("posterior inference"). In particular, exchangeable sequence models can explicitly perform statistical inference; epistemic uncertainty over latent environments is captured by variation in predicted future observations. Formally, we show the sequence prediction loss controls the quality of uncertainty quantification, and propose several approaches for encoding exchangeability in sequence model architectures: data augmentation, regularization, and causal masking. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03307v2-abstract-full').style.display = 'none'; document.getElementById('2408.03307v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08554">arXiv:2407.08554</a> <span> [<a href="https://arxiv.org/pdf/2407.08554">pdf</a>, <a href="https://arxiv.org/format/2407.08554">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Establishing Rigorous and Cost-effective Clinical Trials for Artificial Intelligence Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+W">Wanling Gao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yunyou Huang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+D">Dandan Cui</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhuoming Yu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenjing Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiaoshuang Liang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jiahui Zhao</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jiyue Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Li Ma</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Ning Ye</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yumiao Kang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+D">Dingfeng Luo</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+P">Peng Pan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wei Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhongmou Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jizhong Hu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+G">Gangyuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chongrong Jiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fan Huang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+T">Tianyi Wei</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Suqin Tang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+B">Bingjie Xia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhifei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+J">Jianfeng Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08554v2-abstract-short" style="display: inline;"> A profound gap persists between artificial intelligence (AI) and clinical practice in medicine, primarily due to the lack of rigorous and cost-effective evaluation methodologies. State-of-the-art and state-of-the-practice AI model evaluations are limited to laboratory studies on medical datasets or direct clinical trials with no or solely patient-centered controls. Moreover, the crucial role of cl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08554v2-abstract-full').style.display = 'inline'; document.getElementById('2407.08554v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08554v2-abstract-full" style="display: none;"> A profound gap persists between artificial intelligence (AI) and clinical practice in medicine, primarily due to the lack of rigorous and cost-effective evaluation methodologies. State-of-the-art and state-of-the-practice AI model evaluations are limited to laboratory studies on medical datasets or direct clinical trials with no or solely patient-centered controls. Moreover, the crucial role of clinicians in collaborating with AI, pivotal for determining its impact on clinical practice, is often overlooked. For the first time, we emphasize the critical necessity for rigorous and cost-effective evaluation methodologies for AI models in clinical practice, featuring patient/clinician-centered (dual-centered) AI randomized controlled trials (DC-AI RCTs) and virtual clinician-based in-silico trials (VC-MedAI) as an effective proxy for DC-AI RCTs. Leveraging 7500 diagnosis records from two-step inaugural DC-AI RCTs across 14 medical centers with 125 clinicians, our results demonstrate the necessity of DC-AI RCTs and the effectiveness of VC-MedAI. Notably, VC-MedAI performs comparably to human clinicians, replicating insights and conclusions from prospective DC-AI RCTs. We envision DC-AI RCTs and VC-MedAI as pivotal advancements, presenting innovative and transformative evaluation methodologies for AI models in clinical practice, offering a preclinical-like setting mirroring conventional medicine, and reshaping development paradigms in a cost-effective and fast-iterative manner. Chinese Clinical Trial Registration: ChiCTR2400086816. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08554v2-abstract-full').style.display = 'none'; document.getElementById('2407.08554v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11234">arXiv:2406.11234</a> <span> [<a href="https://arxiv.org/pdf/2406.11234">pdf</a>, <a href="https://arxiv.org/format/2406.11234">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MiniConGTS: A Near Ultimate Minimalist Contrastive Grid Tagging Scheme for Aspect Sentiment Triplet Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Q">Qiao Sun</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Liujia Yang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+M">Minghao Ma</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Q">Qinying Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11234v2-abstract-short" style="display: inline;"> Aspect Sentiment Triplet Extraction (ASTE) aims to co-extract the sentiment triplets in a given corpus. Existing approaches within the pretraining-finetuning paradigm tend to either meticulously craft complex tagging schemes and classification heads, or incorporate external semantic augmentation to enhance performance. In this study, we, for the first time, re-evaluate the redundancy in tagging sc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11234v2-abstract-full').style.display = 'inline'; document.getElementById('2406.11234v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11234v2-abstract-full" style="display: none;"> Aspect Sentiment Triplet Extraction (ASTE) aims to co-extract the sentiment triplets in a given corpus. Existing approaches within the pretraining-finetuning paradigm tend to either meticulously craft complex tagging schemes and classification heads, or incorporate external semantic augmentation to enhance performance. In this study, we, for the first time, re-evaluate the redundancy in tagging schemes and the internal enhancement in pretrained representations. We propose a method to improve and utilize pretrained representations by integrating a minimalist tagging scheme and a novel token-level contrastive learning strategy. The proposed approach demonstrates comparable or superior performance compared to state-of-the-art techniques while featuring a more compact design and reduced computational overhead. Additionally, we are the first to formally evaluate GPT-4's performance in few-shot learning and Chain-of-Thought scenarios for this task. The results demonstrate that the pretraining-finetuning paradigm remains highly effective even in the era of large language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11234v2-abstract-full').style.display = 'none'; document.getElementById('2406.11234v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2403.07342</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07362">arXiv:2406.07362</a> <span> [<a href="https://arxiv.org/pdf/2406.07362">pdf</a>, <a href="https://arxiv.org/format/2406.07362">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> AI.vs.Clinician: Unveiling Intricate Interactions Between AI and Clinicians through an Open-Access Database </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+W">Wanling Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuan Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhuoming Yu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+D">Dandan Cui</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenjing Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiaoshuang Liang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jiahui Zhao</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jiyue Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Li Ma</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Ning Ye</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yumiao Kang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+D">Dingfeng Luo</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+P">Peng Pan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wei Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhongmou Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jizhong Hu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fan Huang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+G">Gangyuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chongrong Jiang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+T">Tianyi Wei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhifei Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yunyou Huang</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+J">Jianfeng Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07362v3-abstract-short" style="display: inline;"> Artificial Intelligence (AI) plays a crucial role in medical field and has the potential to revolutionize healthcare practices. However, the success of AI models and their impacts hinge on the synergy between AI and medical specialists, with clinicians assuming a dominant role. Unfortunately, the intricate dynamics and interactions between AI and clinicians remain undiscovered and thus hinder AI f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07362v3-abstract-full').style.display = 'inline'; document.getElementById('2406.07362v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07362v3-abstract-full" style="display: none;"> Artificial Intelligence (AI) plays a crucial role in medical field and has the potential to revolutionize healthcare practices. However, the success of AI models and their impacts hinge on the synergy between AI and medical specialists, with clinicians assuming a dominant role. Unfortunately, the intricate dynamics and interactions between AI and clinicians remain undiscovered and thus hinder AI from being translated into medical practice. To address this gap, we have curated a groundbreaking database called AI.vs.Clinician. This database is the first of its kind for studying the interactions between AI and clinicians. It derives from 7,500 collaborative diagnosis records on a life-threatening medical emergency -- Sepsis -- from 14 medical centers across China. For the patient cohorts well-chosen from MIMIC databases, the AI-related information comprises the model property, feature input, diagnosis decision, and inferred probabilities of sepsis onset presently and within next three hours. The clinician-related information includes the viewed examination data and sequence, viewed time, preliminary and final diagnosis decisions with or without AI assistance, and recommended treatment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07362v3-abstract-full').style.display = 'none'; document.getElementById('2406.07362v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16417">arXiv:2405.16417</a> <span> [<a href="https://arxiv.org/pdf/2405.16417">pdf</a>, <a href="https://arxiv.org/format/2405.16417">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CRoFT: Robust Fine-Tuning with Concurrent Optimization for OOD Generalization and Open-Set OOD Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lin Zhu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yifeng Yang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Q">Qinying Gu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinbing Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chenghu Zhou</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16417v1-abstract-short" style="display: inline;"> Recent vision-language pre-trained models (VL-PTMs) have shown remarkable success in open-vocabulary tasks. However, downstream use cases often involve further fine-tuning of VL-PTMs, which may distort their general knowledge and impair their ability to handle distribution shifts. In real-world scenarios, machine learning systems inevitably encounter both covariate shifts (e.g., changes in image s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16417v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16417v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16417v1-abstract-full" style="display: none;"> Recent vision-language pre-trained models (VL-PTMs) have shown remarkable success in open-vocabulary tasks. However, downstream use cases often involve further fine-tuning of VL-PTMs, which may distort their general knowledge and impair their ability to handle distribution shifts. In real-world scenarios, machine learning systems inevitably encounter both covariate shifts (e.g., changes in image styles) and semantic shifts (e.g., test-time unseen classes). This highlights the importance of enhancing out-of-distribution (OOD) generalization on covariate shifts and simultaneously detecting semantic-shifted unseen classes. Thus a critical but underexplored question arises: How to improve VL-PTMs' generalization ability to closed-set OOD data, while effectively detecting open-set unseen classes during fine-tuning? In this paper, we propose a novel objective function of OOD detection that also serves to improve OOD generalization. We show that minimizing the gradient magnitude of energy scores on training data leads to domain-consistent Hessians of classification loss, a strong indicator for OOD generalization revealed by theoretical analysis. Based on this finding, we have developed a unified fine-tuning framework that allows for concurrent optimization of both tasks. Extensive experiments have demonstrated the superiority of our method. The code is available at https://github.com/LinLLLL/CRoFT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16417v1-abstract-full').style.display = 'none'; document.getElementById('2405.16417v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICML2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.08816">arXiv:2405.08816</a> <span> [<a href="https://arxiv.org/pdf/2405.08816">pdf</a>, <a href="https://arxiv.org/format/2405.08816">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> The RoboDrive Challenge: Drive Anytime Anywhere in Any Condition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kong%2C+L">Lingdong Kong</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+S">Shaoyuan Xie</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Hanjiang Hu</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Y">Yaru Niu</a>, <a href="/search/cs?searchtype=author&query=Ooi%2C+W+T">Wei Tsang Ooi</a>, <a href="/search/cs?searchtype=author&query=Cottereau%2C+B+R">Benoit R. Cottereau</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+L+X">Lai Xing Ng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yuexin Ma</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+L">Liang Pan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziwei Liu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+W">Weichao Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+X">Xu Cao</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Hao Lu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Ying-Cong Chen</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+C">Caixin Kang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xinning Zhou</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+C">Chengyang Ying</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+W">Wentao Shang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xingxing Wei</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yinpeng Dong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+B">Bo Yang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+S">Shengyin Jiang</a> , et al. (66 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.08816v2-abstract-short" style="display: inline;"> In the realm of autonomous driving, robust perception under out-of-distribution conditions is paramount for the safe deployment of vehicles. Challenges such as adverse weather, sensor malfunctions, and environmental unpredictability can severely impact the performance of autonomous systems. The 2024 RoboDrive Challenge was crafted to propel the development of driving perception technologies that c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08816v2-abstract-full').style.display = 'inline'; document.getElementById('2405.08816v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.08816v2-abstract-full" style="display: none;"> In the realm of autonomous driving, robust perception under out-of-distribution conditions is paramount for the safe deployment of vehicles. Challenges such as adverse weather, sensor malfunctions, and environmental unpredictability can severely impact the performance of autonomous systems. The 2024 RoboDrive Challenge was crafted to propel the development of driving perception technologies that can withstand and adapt to these real-world variabilities. Focusing on four pivotal tasks -- BEV detection, map segmentation, semantic occupancy prediction, and multi-view depth estimation -- the competition laid down a gauntlet to innovate and enhance system resilience against typical and atypical disturbances. This year's challenge consisted of five distinct tracks and attracted 140 registered teams from 93 institutes across 11 countries, resulting in nearly one thousand submissions evaluated through our servers. The competition culminated in 15 top-performing solutions, which introduced a range of innovative approaches including advanced data augmentation, multi-sensor fusion, self-supervised learning for error correction, and new algorithmic strategies to enhance sensor robustness. These contributions significantly advanced the state of the art, particularly in handling sensor inconsistencies and environmental variability. Participants, through collaborative efforts, pushed the boundaries of current technologies, showcasing their potential in real-world scenarios. Extensive evaluations and analyses provided insights into the effectiveness of these solutions, highlighting key trends and successful strategies for improving the resilience of driving perception systems. This challenge has set a new benchmark in the field, providing a rich repository of techniques expected to guide future research in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08816v2-abstract-full').style.display = 'none'; document.getElementById('2405.08816v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICRA 2024; 32 pages, 24 figures, 5 tables; Code at https://robodrive-24.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.18762">arXiv:2403.18762</a> <span> [<a href="https://arxiv.org/pdf/2403.18762">pdf</a>, <a href="https://arxiv.org/format/2403.18762">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> ModaLink: Unifying Modalities for Efficient Image-to-PointCloud Place Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+W">Weidong Xie</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Lun Luo</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanfei Ye</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yi Ren</a>, <a href="/search/cs?searchtype=author&query=Du%2C+S">Shaoyi Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Minhang Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jintao Xu</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+R">Rui Ai</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+W">Weihao Gu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xieyuanli Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.18762v1-abstract-short" style="display: inline;"> Place recognition is an important task for robots and autonomous cars to localize themselves and close loops in pre-built maps. While single-modal sensor-based methods have shown satisfactory performance, cross-modal place recognition that retrieving images from a point-cloud database remains a challenging problem. Current cross-modal methods transform images into 3D points using depth estimation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18762v1-abstract-full').style.display = 'inline'; document.getElementById('2403.18762v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.18762v1-abstract-full" style="display: none;"> Place recognition is an important task for robots and autonomous cars to localize themselves and close loops in pre-built maps. While single-modal sensor-based methods have shown satisfactory performance, cross-modal place recognition that retrieving images from a point-cloud database remains a challenging problem. Current cross-modal methods transform images into 3D points using depth estimation for modality conversion, which are usually computationally intensive and need expensive labeled data for depth supervision. In this work, we introduce a fast and lightweight framework to encode images and point clouds into place-distinctive descriptors. We propose an effective Field of View (FoV) transformation module to convert point clouds into an analogous modality as images. This module eliminates the necessity for depth estimation and helps subsequent modules achieve real-time performance. We further design a non-negative factorization-based encoder to extract mutually consistent semantic features between point clouds and images. This encoder yields more distinctive global descriptors for retrieval. Experimental results on the KITTI dataset show that our proposed methods achieve state-of-the-art performance while running in real time. Additional evaluation on the HAOMO dataset covering a 17 km trajectory further shows the practical generalization capabilities. We have released the implementation of our methods as open source at: https://github.com/haomo-ai/ModaLink.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18762v1-abstract-full').style.display = 'none'; document.getElementById('2403.18762v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 11 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15712">arXiv:2403.15712</a> <span> [<a href="https://arxiv.org/pdf/2403.15712">pdf</a>, <a href="https://arxiv.org/format/2403.15712">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LRA.2024.3379865">10.1109/LRA.2024.3379865 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PNAS-MOT: Multi-Modal Object Tracking with Pareto Neural Architecture Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Peng%2C+C">Chensheng Peng</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhaoyu Zeng</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jinling Gao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jundong Zhou</a>, <a href="/search/cs?searchtype=author&query=Tomizuka%2C+M">Masayoshi Tomizuka</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinbing Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chenghu Zhou</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15712v1-abstract-short" style="display: inline;"> Multiple object tracking is a critical task in autonomous driving. Existing works primarily focus on the heuristic design of neural networks to obtain high accuracy. As tracking accuracy improves, however, neural networks become increasingly complex, posing challenges for their practical application in real driving scenarios due to the high level of latency. In this paper, we explore the use of th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15712v1-abstract-full').style.display = 'inline'; document.getElementById('2403.15712v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15712v1-abstract-full" style="display: none;"> Multiple object tracking is a critical task in autonomous driving. Existing works primarily focus on the heuristic design of neural networks to obtain high accuracy. As tracking accuracy improves, however, neural networks become increasingly complex, posing challenges for their practical application in real driving scenarios due to the high level of latency. In this paper, we explore the use of the neural architecture search (NAS) methods to search for efficient architectures for tracking, aiming for low real-time latency while maintaining relatively high accuracy. Another challenge for object tracking is the unreliability of a single sensor, therefore, we propose a multi-modal framework to improve the robustness. Experiments demonstrate that our algorithm can run on edge devices within lower latency constraints, thus greatly reducing the computational requirements for multi-modal object tracking while keeping lower latency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15712v1-abstract-full').style.display = 'none'; document.getElementById('2403.15712v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Robotics and Automation Letters 2024. Code is available at https://github.com/PholyPeng/PNAS-MOT</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Robotics and Automation Letters, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07342">arXiv:2403.07342</a> <span> [<a href="https://arxiv.org/pdf/2403.07342">pdf</a>, <a href="https://arxiv.org/format/2403.07342">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Rethinking ASTE: A Minimalist Tagging Scheme Alongside Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Q">Qiao Sun</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Liujia Yang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+M">Minghao Ma</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Q">Qinying Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07342v2-abstract-short" style="display: inline;"> Aspect Sentiment Triplet Extraction (ASTE) is a burgeoning subtask of fine-grained sentiment analysis, aiming to extract structured sentiment triplets from unstructured textual data. Existing approaches to ASTE often complicate the task with additional structures or external data. In this research, we propose a novel tagging scheme and employ a contrastive learning approach to mitigate these chall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07342v2-abstract-full').style.display = 'inline'; document.getElementById('2403.07342v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07342v2-abstract-full" style="display: none;"> Aspect Sentiment Triplet Extraction (ASTE) is a burgeoning subtask of fine-grained sentiment analysis, aiming to extract structured sentiment triplets from unstructured textual data. Existing approaches to ASTE often complicate the task with additional structures or external data. In this research, we propose a novel tagging scheme and employ a contrastive learning approach to mitigate these challenges. The proposed approach demonstrates comparable or superior performance in comparison to state-of-the-art techniques, while featuring a more compact design and reduced computational overhead. Notably, even in the era of Large Language Models (LLMs), our method exhibits superior efficacy compared to GPT 3.5 and GPT 4 in a few-shot learning scenarios. This study also provides valuable insights for the advancement of ASTE techniques within the paradigm of large language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07342v2-abstract-full').style.display = 'none'; document.getElementById('2403.07342v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03635">arXiv:2403.03635</a> <span> [<a href="https://arxiv.org/pdf/2403.03635">pdf</a>, <a href="https://arxiv.org/format/2403.03635">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Processing Load Allocation of On-Board Multi-User Detection for Payload-Constrained Satellite Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Miao%2C+S">Sirui Miao</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Neng Ye</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peisen Wang</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+Q">Qiaolin Ouyang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03635v1-abstract-short" style="display: inline;"> The rapid advance of mega-constellation facilitates the booming of direct-to-satellite massive access, where multi-user detection is critical to alleviate the induced inter-user interference. While centralized implementation of on-board detection induces unaffordable complexity for a single satellite, this paper proposes to allocate the processing load among cooperative satellites for finest explo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03635v1-abstract-full').style.display = 'inline'; document.getElementById('2403.03635v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03635v1-abstract-full" style="display: none;"> The rapid advance of mega-constellation facilitates the booming of direct-to-satellite massive access, where multi-user detection is critical to alleviate the induced inter-user interference. While centralized implementation of on-board detection induces unaffordable complexity for a single satellite, this paper proposes to allocate the processing load among cooperative satellites for finest exploitation of distributed processing power. Observing the inherent disparities among users, we first excavate the closed-form trade-offs between achievable sum-rate and the processing load corresponding to the satellite-user matchings, which leads to a system sum-rate maximization problem under stringent payload constraints. To address the non-trivial integer matching, we develop a quadratic transformation to the original problem, and prove it an equivalent conversion. The problem is further simplified into a series of subproblems employing successive lower bound approximation which obtains polynomial-time complexity and converges within a few iterations. Numerical results show remarkably complexity reduction compared with centralized processing, as well as around 20\% sum-rate gain compared with other allocation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03635v1-abstract-full').style.display = 'none'; document.getElementById('2403.03635v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.02576">arXiv:2403.02576</a> <span> [<a href="https://arxiv.org/pdf/2403.02576">pdf</a>, <a href="https://arxiv.org/format/2403.02576">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> AceMap: Knowledge Discovery through Academic Graph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinbing Wang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+L">Luoyi Fu</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+X">Xiaoying Gan</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Ying Wen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+G">Guanjie Zheng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+J">Jiaxin Ding</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+L">Liyao Xiang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+M">Meng Jin</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+S">Shiyu Liang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+B">Bin Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haiwen Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yi Xu</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+C">Cheng Deng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shao Zhang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+H">Huquan Kang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xingli Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qi Li</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhixin Guo</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+J">Jiexing Qi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Pan Liu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yuyang Ren</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lyuwen Wu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jungang Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jianping Zhou</a> , et al. (1 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.02576v2-abstract-short" style="display: inline;"> The exponential growth of scientific literature requires effective management and extraction of valuable insights. While existing scientific search engines excel at delivering search results based on relational databases, they often neglect the analysis of collaborations between scientific entities and the evolution of ideas, as well as the in-depth analysis of content within scientific publicatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02576v2-abstract-full').style.display = 'inline'; document.getElementById('2403.02576v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.02576v2-abstract-full" style="display: none;"> The exponential growth of scientific literature requires effective management and extraction of valuable insights. While existing scientific search engines excel at delivering search results based on relational databases, they often neglect the analysis of collaborations between scientific entities and the evolution of ideas, as well as the in-depth analysis of content within scientific publications. The representation of heterogeneous graphs and the effective measurement, analysis, and mining of such graphs pose significant challenges. To address these challenges, we present AceMap, an academic system designed for knowledge discovery through academic graph. We present advanced database construction techniques to build the comprehensive AceMap database with large-scale academic entities that contain rich visual, textual, and numerical information. AceMap also employs innovative visualization, quantification, and analysis methods to explore associations and logical relationships among academic entities. AceMap introduces large-scale academic network visualization techniques centered on nebular graphs, providing a comprehensive view of academic networks from multiple perspectives. In addition, AceMap proposes a unified metric based on structural entropy to quantitatively measure the knowledge content of different academic entities. Moreover, AceMap provides advanced analysis capabilities, including tracing the evolution of academic ideas through citation relationships and concept co-occurrence, and generating concise summaries informed by this evolutionary process. In addition, AceMap uses machine reading methods to generate potential new ideas at the intersection of different fields. Exploring the integration of large language models and knowledge graphs is a promising direction for future research in idea evolution. Please visit \url{https://www.acemap.info} for further exploration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02576v2-abstract-full').style.display = 'none'; document.getElementById('2403.02576v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Technical Report for AceMap (https://www.acemap.info)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05819">arXiv:2402.05819</a> <span> [<a href="https://arxiv.org/pdf/2402.05819">pdf</a>, <a href="https://arxiv.org/format/2402.05819">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Integrating Self-supervised Speech Model with Pseudo Word-level Targets from Visually-grounded Speech Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fang%2C+H">Hung-Chieh Fang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nai-Xuan Ye</a>, <a href="/search/cs?searchtype=author&query=Shih%2C+Y">Yi-Jen Shih</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+P">Puyuan Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hsuan-Fu Wang</a>, <a href="/search/cs?searchtype=author&query=Berry%2C+L">Layne Berry</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hung-yi Lee</a>, <a href="/search/cs?searchtype=author&query=Harwath%2C+D">David Harwath</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05819v1-abstract-short" style="display: inline;"> Recent advances in self-supervised speech models have shown significant improvement in many downstream tasks. However, these models predominantly centered on frame-level training objectives, which can fall short in spoken language understanding tasks that require semantic comprehension. Existing works often rely on additional speech-text data as intermediate targets, which is costly in the real-wo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05819v1-abstract-full').style.display = 'inline'; document.getElementById('2402.05819v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05819v1-abstract-full" style="display: none;"> Recent advances in self-supervised speech models have shown significant improvement in many downstream tasks. However, these models predominantly centered on frame-level training objectives, which can fall short in spoken language understanding tasks that require semantic comprehension. Existing works often rely on additional speech-text data as intermediate targets, which is costly in the real-world setting. To address this challenge, we propose Pseudo-Word HuBERT (PW-HuBERT), a framework that integrates pseudo word-level targets into the training process, where the targets are derived from a visually-ground speech model, notably eliminating the need for speech-text paired data. Our experimental results on four spoken language understanding (SLU) benchmarks suggest the superiority of our model in capturing semantic information. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05819v1-abstract-full').style.display = 'none'; document.getElementById('2402.05819v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICASSP 2024 workshop on Self-supervision in Audio, Speech, and Beyond (SASB)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.04672">arXiv:2402.04672</a> <span> [<a href="https://arxiv.org/pdf/2402.04672">pdf</a>, <a href="https://arxiv.org/format/2402.04672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> G-NAS: Generalizable Neural Architecture Search for Single Domain Generalization Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fan Wu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jinling Gao</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+L">Lanqing Hong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinbing Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chenghu Zhou</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.04672v1-abstract-short" style="display: inline;"> In this paper, we focus on a realistic yet challenging task, Single Domain Generalization Object Detection (S-DGOD), where only one source domain's data can be used for training object detectors, but have to generalize multiple distinct target domains. In S-DGOD, both high-capacity fitting and generalization abilities are needed due to the task's complexity. Differentiable Neural Architecture Sear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04672v1-abstract-full').style.display = 'inline'; document.getElementById('2402.04672v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.04672v1-abstract-full" style="display: none;"> In this paper, we focus on a realistic yet challenging task, Single Domain Generalization Object Detection (S-DGOD), where only one source domain's data can be used for training object detectors, but have to generalize multiple distinct target domains. In S-DGOD, both high-capacity fitting and generalization abilities are needed due to the task's complexity. Differentiable Neural Architecture Search (NAS) is known for its high capacity for complex data fitting and we propose to leverage Differentiable NAS to solve S-DGOD. However, it may confront severe over-fitting issues due to the feature imbalance phenomenon, where parameters optimized by gradient descent are biased to learn from the easy-to-learn features, which are usually non-causal and spuriously correlated to ground truth labels, such as the features of background in object detection data. Consequently, this leads to serious performance degradation, especially in generalizing to unseen target domains with huge domain gaps between the source domain and target domains. To address this issue, we propose the Generalizable loss (G-loss), which is an OoD-aware objective, preventing NAS from over-fitting by using gradient descent to optimize parameters not only on a subset of easy-to-learn features but also the remaining predictive features for generalization, and the overall framework is named G-NAS. Experimental results on the S-DGOD urban-scene datasets demonstrate that the proposed G-NAS achieves SOTA performance compared to baseline methods. Codes are available at https://github.com/wufan-cse/G-NAS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04672v1-abstract-full').style.display = 'none'; document.getElementById('2402.04672v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.12937">arXiv:2312.12937</a> <span> [<a href="https://arxiv.org/pdf/2312.12937">pdf</a>, <a href="https://arxiv.org/format/2312.12937">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Robust Loss Functions for Training Decision Trees with Noisy Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wilton%2C+J">Jonathan Wilton</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.12937v2-abstract-short" style="display: inline;"> We consider training decision trees using noisily labeled data, focusing on loss functions that can lead to robust learning algorithms. Our contributions are threefold. First, we offer novel theoretical insights on the robustness of many existing loss functions in the context of decision tree learning. We show that some of the losses belong to a class of what we call conservative losses, and the c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12937v2-abstract-full').style.display = 'inline'; document.getElementById('2312.12937v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.12937v2-abstract-full" style="display: none;"> We consider training decision trees using noisily labeled data, focusing on loss functions that can lead to robust learning algorithms. Our contributions are threefold. First, we offer novel theoretical insights on the robustness of many existing loss functions in the context of decision tree learning. We show that some of the losses belong to a class of what we call conservative losses, and the conservative losses lead to an early stopping behavior during training and noise-tolerant predictions during testing. Second, we introduce a framework for constructing robust loss functions, called distribution losses. These losses apply percentile-based penalties based on an assumed margin distribution, and they naturally allow adapting to different noise rates via a robustness parameter. In particular, we introduce a new loss called the negative exponential loss, which leads to an efficient greedy impurity-reduction learning algorithm. Lastly, our experiments on multiple datasets and noise settings validate our theoretical insight and the effectiveness of our adaptive negative exponential loss. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12937v2-abstract-full').style.display = 'none'; document.getElementById('2312.12937v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AAAI Conference on Artificial Intelligence 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11318">arXiv:2312.11318</a> <span> [<a href="https://arxiv.org/pdf/2312.11318">pdf</a>, <a href="https://arxiv.org/format/2312.11318">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Domain Invariant Learning for Gaussian Processes and Bayesian Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xilong Zhao</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+S">Siyuan Bian</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yaoyun Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuliang Zhang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Q">Qinying Gu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinbing Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chenghu Zhou</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11318v1-abstract-short" style="display: inline;"> Out-of-distribution (OOD) generalization has long been a challenging problem that remains largely unsolved. Gaussian processes (GP), as popular probabilistic model classes, especially in the small data regime, presume strong OOD generalization abilities. Surprisingly, their OOD generalization abilities have been under-explored before compared with other lines of GP research. In this paper, we iden… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11318v1-abstract-full').style.display = 'inline'; document.getElementById('2312.11318v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11318v1-abstract-full" style="display: none;"> Out-of-distribution (OOD) generalization has long been a challenging problem that remains largely unsolved. Gaussian processes (GP), as popular probabilistic model classes, especially in the small data regime, presume strong OOD generalization abilities. Surprisingly, their OOD generalization abilities have been under-explored before compared with other lines of GP research. In this paper, we identify that GP is not free from the problem and propose a domain invariant learning algorithm for Gaussian processes (DIL-GP) with a min-max optimization on the likelihood. DIL-GP discovers the heterogeneity in the data and forces invariance across partitioned subsets of data. We further extend the DIL-GP to improve Bayesian optimization's adaptability on changing environments. Numerical experiments demonstrate the superiority of DIL-GP for predictions on several synthetic and real-world datasets. We further demonstrate the effectiveness of the DIL-GP Bayesian optimization method on a PID parameters tuning experiment for a quadrotor. The full version and source code are available at: https://github.com/Billzxl/DIL-GP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11318v1-abstract-full').style.display = 'none'; document.getElementById('2312.11318v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to The 38th Annual AAAI Conference on Artificial Intelligence (AAAI 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12078">arXiv:2311.12078</a> <span> [<a href="https://arxiv.org/pdf/2311.12078">pdf</a>, <a href="https://arxiv.org/format/2311.12078">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Fast Controllable Diffusion Models for Undersampled MRI Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+W">Wei Jiang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zhuang Xiong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hongfu Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12078v3-abstract-short" style="display: inline;"> Supervised deep learning methods have shown promise in undersampled Magnetic Resonance Imaging (MRI) reconstruction, but their requirement for paired data limits their generalizability to the diverse MRI acquisition parameters. Recently, unsupervised controllable generative diffusion models have been applied to undersampled MRI reconstruction, without paired data or model retraining for different… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12078v3-abstract-full').style.display = 'inline'; document.getElementById('2311.12078v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12078v3-abstract-full" style="display: none;"> Supervised deep learning methods have shown promise in undersampled Magnetic Resonance Imaging (MRI) reconstruction, but their requirement for paired data limits their generalizability to the diverse MRI acquisition parameters. Recently, unsupervised controllable generative diffusion models have been applied to undersampled MRI reconstruction, without paired data or model retraining for different MRI acquisitions. However, diffusion models are generally slow in sampling and state-of-the-art acceleration techniques can lead to sub-optimal results when directly applied to the controllable generation process. This study introduces a new algorithm called Predictor-Projector-Noisor (PPN), which enhances and accelerates controllable generation of diffusion models for undersampled MRI reconstruction. Our results demonstrate that PPN produces high-fidelity MR images that conform to undersampled k-space measurements with significantly shorter reconstruction time than other controllable sampling methods. In addition, the unsupervised PPN accelerated diffusion models are adaptable to different MRI acquisition parameters, making them more practical for clinical use than supervised learning techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12078v3-abstract-full').style.display = 'none'; document.getElementById('2311.12078v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.08295">arXiv:2305.08295</a> <span> [<a href="https://arxiv.org/pdf/2305.08295">pdf</a>, <a href="https://arxiv.org/format/2305.08295">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CLImage: Human-Annotated Datasets for Complementary-Label Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hsiu-Hsuan Wang</a>, <a href="/search/cs?searchtype=author&query=Mai%2C+T">Tan-Ha Mai</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nai-Xuan Ye</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wei-I Lin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hsuan-Tien Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.08295v3-abstract-short" style="display: inline;"> Complementary-label learning (CLL) is a weakly-supervised learning paradigm that aims to train a multi-class classifier using only complementary labels, which indicate classes to which an instance does not belong. Despite numerous algorithmic proposals for CLL, their practical applicability remains unverified for two reasons. Firstly, these algorithms often rely on assumptions about the generation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08295v3-abstract-full').style.display = 'inline'; document.getElementById('2305.08295v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.08295v3-abstract-full" style="display: none;"> Complementary-label learning (CLL) is a weakly-supervised learning paradigm that aims to train a multi-class classifier using only complementary labels, which indicate classes to which an instance does not belong. Despite numerous algorithmic proposals for CLL, their practical applicability remains unverified for two reasons. Firstly, these algorithms often rely on assumptions about the generation of complementary labels, and it is not clear how far the assumptions are from reality. Secondly, their evaluation has been limited to synthetic datasets. To gain insights into the real-world performance of CLL algorithms, we developed a protocol to collect complementary labels from human annotators. Our efforts resulted in the creation of four datasets: CLCIFAR10, CLCIFAR20, CLMicroImageNet10, and CLMicroImageNet20, derived from well-known classification datasets CIFAR10, CIFAR100, and TinyImageNet200. These datasets represent the very first real-world CLL datasets. Through extensive benchmark experiments, we discovered a notable decrease in performance when transitioning from synthetic datasets to real-world datasets. We investigated the key factors contributing to the decrease with a thorough dataset-level ablation study. Our analyses highlight annotation noise as the most influential factor in the real-world datasets. In addition, we discover that the biased-nature of human-annotated complementary labels and the difficulty to validate with only complementary labels are two outstanding barriers to practical CLL. These findings suggest that the community focus more research efforts on developing CLL algorithms and validation schemes that are robust to noisy and biased complementary-label distributions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08295v3-abstract-full').style.display = 'none'; document.getElementById('2305.08295v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.08049">arXiv:2305.08049</a> <span> [<a href="https://arxiv.org/pdf/2305.08049">pdf</a>, <a href="https://arxiv.org/format/2305.08049">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Surprisingly Simple Continuous-Action POMDP Solver: Lazy Cross-Entropy Search Over Policy Trees </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hoerger%2C+M">Marcus Hoerger</a>, <a href="/search/cs?searchtype=author&query=Kurniawati%2C+H">Hanna Kurniawati</a>, <a href="/search/cs?searchtype=author&query=Kroese%2C+D">Dirk Kroese</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.08049v2-abstract-short" style="display: inline;"> The Partially Observable Markov Decision Process (POMDP) provides a principled framework for decision making in stochastic partially observable environments. However, computing good solutions for problems with continuous action spaces remains challenging. To ease this challenge, we propose a simple online POMDP solver, called Lazy Cross-Entropy Search Over Policy Trees (LCEOPT). At each planning s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08049v2-abstract-full').style.display = 'inline'; document.getElementById('2305.08049v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.08049v2-abstract-full" style="display: none;"> The Partially Observable Markov Decision Process (POMDP) provides a principled framework for decision making in stochastic partially observable environments. However, computing good solutions for problems with continuous action spaces remains challenging. To ease this challenge, we propose a simple online POMDP solver, called Lazy Cross-Entropy Search Over Policy Trees (LCEOPT). At each planning step, our method uses a novel lazy Cross-Entropy method to search the space of policy trees, which provide a simple policy representation. Specifically, we maintain a distribution on promising finite-horizon policy trees. The distribution is iteratively updated by sampling policies, evaluating them via Monte Carlo simulation, and refitting them to the top-performing ones. Our method is lazy in the sense that it exploits the policy tree representation to avoid redundant computations in policy sampling, evaluation, and distribution update. This leads to computational savings of up to two orders of magnitude. Our LCEOPT is surprisingly simple as compared to existing state-of-the-art methods, yet empirically outperforms them on several continuous-action POMDP problems, particularly for problems with higher-dimensional action spaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08049v2-abstract-full').style.display = 'none'; document.getElementById('2305.08049v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published in the proceedings of The 38th Annual AAAI Conference on Artificial Intelligence</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.10439">arXiv:2302.10439</a> <span> [<a href="https://arxiv.org/pdf/2302.10439">pdf</a>, <a href="https://arxiv.org/format/2302.10439">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Discretization using Voronoi Trees for Continuous POMDPs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hoerger%2C+M">Marcus Hoerger</a>, <a href="/search/cs?searchtype=author&query=Kurniawati%2C+H">Hanna Kurniawati</a>, <a href="/search/cs?searchtype=author&query=Kroese%2C+D">Dirk Kroese</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.10439v1-abstract-short" style="display: inline;"> Solving continuous Partially Observable Markov Decision Processes (POMDPs) is challenging, particularly for high-dimensional continuous action spaces. To alleviate this difficulty, we propose a new sampling-based online POMDP solver, called Adaptive Discretization using Voronoi Trees (ADVT). It uses Monte Carlo Tree Search in combination with an adaptive discretization of the action space as well… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.10439v1-abstract-full').style.display = 'inline'; document.getElementById('2302.10439v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.10439v1-abstract-full" style="display: none;"> Solving continuous Partially Observable Markov Decision Processes (POMDPs) is challenging, particularly for high-dimensional continuous action spaces. To alleviate this difficulty, we propose a new sampling-based online POMDP solver, called Adaptive Discretization using Voronoi Trees (ADVT). It uses Monte Carlo Tree Search in combination with an adaptive discretization of the action space as well as optimistic optimization to efficiently sample high-dimensional continuous action spaces and compute the best action to perform. Specifically, we adaptively discretize the action space for each sampled belief using a hierarchical partition called Voronoi tree, which is a Binary Space Partitioning that implicitly maintains the partition of a cell as the Voronoi diagram of two points sampled from the cell. ADVT uses the estimated diameters of the cells to form an upper-confidence bound on the action value function within the cell, guiding the Monte Carlo Tree Search expansion and further discretization of the action space. This enables ADVT to better exploit local information with respect to the action value function, allowing faster identification of the most promising regions in the action space, compared to existing solvers. Voronoi trees keep the cost of partitioning and estimating the diameter of each cell low, even in high-dimensional spaces where many sampled points are required to cover the space well. ADVT additionally handles continuous observation spaces, by adopting an observation progressive widening strategy, along with a weighted particle representation of beliefs. Experimental results indicate that ADVT scales substantially better to high-dimensional continuous action spaces, compared to state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.10439v1-abstract-full').style.display = 'none'; document.getElementById('2302.10439v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to The International Journal of Robotics Research (IJRR). arXiv admin note: substantial text overlap with arXiv:2209.05733</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.16318">arXiv:2210.16318</a> <span> [<a href="https://arxiv.org/pdf/2210.16318">pdf</a>, <a href="https://arxiv.org/format/2210.16318">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Filter and evolve: progressive pseudo label refining for semi-supervised automatic speech recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zezhong Jin</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+D">Dading Zhong</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xiao Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhaoyi Liu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Naipeng Ye</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Q">Qingcheng Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.16318v1-abstract-short" style="display: inline;"> Fine tuning self supervised pretrained models using pseudo labels can effectively improve speech recognition performance. But, low quality pseudo labels can misguide decision boundaries and degrade performance. We propose a simple yet effective strategy to filter low quality pseudo labels to alleviate this problem. Specifically, pseudo-labels are produced over the entire training set and filtered… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.16318v1-abstract-full').style.display = 'inline'; document.getElementById('2210.16318v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.16318v1-abstract-full" style="display: none;"> Fine tuning self supervised pretrained models using pseudo labels can effectively improve speech recognition performance. But, low quality pseudo labels can misguide decision boundaries and degrade performance. We propose a simple yet effective strategy to filter low quality pseudo labels to alleviate this problem. Specifically, pseudo-labels are produced over the entire training set and filtered via average probability scores calculated from the model output. Subsequently, an optimal percentage of utterances with high probability scores are considered reliable training data with trustworthy labels. The model is iteratively updated to correct the unreliable pseudo labels to minimize the effect of noisy labels. The process above is repeated until unreliable pseudo abels have been adequately corrected. Extensive experiments on LibriSpeech show that these filtered samples enable the refined model to yield more correct predictions, leading to better ASR performances under various experimental settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.16318v1-abstract-full').style.display = 'none'; document.getElementById('2210.16318v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.08461">arXiv:2210.08461</a> <span> [<a href="https://arxiv.org/pdf/2210.08461">pdf</a>, <a href="https://arxiv.org/format/2210.08461">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Positive-Unlabeled Learning using Random Forests via Recursive Greedy Risk Minimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wilton%2C+J">Jonathan Wilton</a>, <a href="/search/cs?searchtype=author&query=Koay%2C+A+M+Y">Abigail M. Y. Koay</a>, <a href="/search/cs?searchtype=author&query=Ko%2C+R+K+L">Ryan K. L. Ko</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Miao Xu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.08461v1-abstract-short" style="display: inline;"> The need to learn from positive and unlabeled data, or PU learning, arises in many applications and has attracted increasing interest. While random forests are known to perform well on many tasks with positive and negative data, recent PU algorithms are generally based on deep neural networks, and the potential of tree-based PU learning is under-explored. In this paper, we propose new random fores… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.08461v1-abstract-full').style.display = 'inline'; document.getElementById('2210.08461v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.08461v1-abstract-full" style="display: none;"> The need to learn from positive and unlabeled data, or PU learning, arises in many applications and has attracted increasing interest. While random forests are known to perform well on many tasks with positive and negative data, recent PU algorithms are generally based on deep neural networks, and the potential of tree-based PU learning is under-explored. In this paper, we propose new random forest algorithms for PU-learning. Key to our approach is a new interpretation of decision tree algorithms for positive and negative data as \emph{recursive greedy risk minimization algorithms}. We extend this perspective to the PU setting to develop new decision tree learning algorithms that directly minimizes PU-data based estimators for the expected risk. This allows us to develop an efficient PU random forest algorithm, PU extra trees. Our approach features three desirable properties: it is robust to the choice of the loss function in the sense that various loss functions lead to the same decision trees; it requires little hyperparameter tuning as compared to neural network based PU learning; it supports a feature importance that directly measures a feature's contribution to risk minimization. Our algorithms demonstrate strong performance on several datasets. Our code is available at \url{https://github.com/puetpaper/PUExtraTrees}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.08461v1-abstract-full').style.display = 'none'; document.getElementById('2210.08461v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.01795">arXiv:2210.01795</a> <span> [<a href="https://arxiv.org/pdf/2210.01795">pdf</a>, <a href="https://arxiv.org/format/2210.01795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/DAC18074.2021.9586115">10.1109/DAC18074.2021.9586115 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> BayesFT: Bayesian Optimization for Fault Tolerant Neural Network Architecture </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+J">Jingbiao Mei</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Z">Zhicheng Fang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuwen Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Ziqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Huaying Wu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiaoyao Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.01795v1-abstract-short" style="display: inline;"> To deploy deep learning algorithms on resource-limited scenarios, an emerging device-resistive random access memory (ReRAM) has been regarded as promising via analog computing. However, the practicability of ReRAM is primarily limited due to the weight drifting of ReRAM neural networks due to multi-factor reasons, including manufacturing, thermal noises, and etc. In this paper, we propose a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01795v1-abstract-full').style.display = 'inline'; document.getElementById('2210.01795v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.01795v1-abstract-full" style="display: none;"> To deploy deep learning algorithms on resource-limited scenarios, an emerging device-resistive random access memory (ReRAM) has been regarded as promising via analog computing. However, the practicability of ReRAM is primarily limited due to the weight drifting of ReRAM neural networks due to multi-factor reasons, including manufacturing, thermal noises, and etc. In this paper, we propose a novel Bayesian optimization method for fault tolerant neural network architecture (BayesFT). For neural architecture search space design, instead of conducting neural architecture search on the whole feasible neural architecture search space, we first systematically explore the weight drifting tolerance of different neural network components, such as dropout, normalization, number of layers, and activation functions in which dropout is found to be able to improve the neural network robustness to weight drifting. Based on our analysis, we propose an efficient search space by only searching for dropout rates for each layer. Then, we use Bayesian optimization to search for the optimal neural architecture robust to weight drifting. Empirical experiments demonstrate that our algorithmic framework has outperformed the state-of-the-art methods by up to 10 times on various tasks, such as image classification and object detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01795v1-abstract-full').style.display = 'none'; document.getElementById('2210.01795v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.05733">arXiv:2209.05733</a> <span> [<a href="https://arxiv.org/pdf/2209.05733">pdf</a>, <a href="https://arxiv.org/format/2209.05733">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Discretization using Voronoi Trees for Continuous-Action POMDPs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hoerger%2C+M">Marcus Hoerger</a>, <a href="/search/cs?searchtype=author&query=Kurniawati%2C+H">Hanna Kurniawati</a>, <a href="/search/cs?searchtype=author&query=Kroese%2C+D">Dirk Kroese</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.05733v1-abstract-short" style="display: inline;"> Solving Partially Observable Markov Decision Processes (POMDPs) with continuous actions is challenging, particularly for high-dimensional action spaces. To alleviate this difficulty, we propose a new sampling-based online POMDP solver, called Adaptive Discretization using Voronoi Trees (ADVT). It uses Monte Carlo Tree Search in combination with an adaptive discretization of the action space as wel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.05733v1-abstract-full').style.display = 'inline'; document.getElementById('2209.05733v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.05733v1-abstract-full" style="display: none;"> Solving Partially Observable Markov Decision Processes (POMDPs) with continuous actions is challenging, particularly for high-dimensional action spaces. To alleviate this difficulty, we propose a new sampling-based online POMDP solver, called Adaptive Discretization using Voronoi Trees (ADVT). It uses Monte Carlo Tree Search in combination with an adaptive discretization of the action space as well as optimistic optimization to efficiently sample high-dimensional continuous action spaces and compute the best action to perform. Specifically, we adaptively discretize the action space for each sampled belief using a hierarchical partition which we call a Voronoi tree. A Voronoi tree is a Binary Space Partitioning (BSP) that implicitly maintains the partition of a cell as the Voronoi diagram of two points sampled from the cell. This partitioning strategy keeps the cost of partitioning and estimating the size of each cell low, even in high-dimensional spaces where many sampled points are required to cover the space well. ADVT uses the estimated sizes of the cells to form an upper-confidence bound of the action values of the cell, and in turn uses the upper-confidence bound to guide the Monte Carlo Tree Search expansion and further discretization of the action space. This strategy enables ADVT to better exploit local information in the action space, leading to an action space discretization that is more adaptive, and hence more efficient in computing good POMDP solutions, compared to existing solvers. Experiments on simulations of four types of benchmark problems indicate that ADVT outperforms and scales substantially better to high-dimensional continuous action spaces, compared to state-of-the-art continuous action POMDP solvers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.05733v1-abstract-full').style.display = 'none'; document.getElementById('2209.05733v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in The 15th International Workshop on the Algorithmic Foundations of Robotics (WAFR 2022). To be published in the Springer Proceedings in Advanced Robotics (SPAR)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.05749">arXiv:2206.05749</a> <span> [<a href="https://arxiv.org/pdf/2206.05749">pdf</a>, <a href="https://arxiv.org/format/2206.05749">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Regularization Penalty Optimization for Addressing Data Quality Variance in OoD Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+R">Runpeng Yu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Hong Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kaican Li</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+L">Lanqing Hong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Shao-Lun Huang</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.05749v1-abstract-short" style="display: inline;"> Due to the poor generalization performance of traditional empirical risk minimization (ERM) in the case of distributional shift, Out-of-Distribution (OoD) generalization algorithms receive increasing attention. However, OoD generalization algorithms overlook the great variance in the quality of training data, which significantly compromises the accuracy of these methods. In this paper, we theoreti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05749v1-abstract-full').style.display = 'inline'; document.getElementById('2206.05749v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.05749v1-abstract-full" style="display: none;"> Due to the poor generalization performance of traditional empirical risk minimization (ERM) in the case of distributional shift, Out-of-Distribution (OoD) generalization algorithms receive increasing attention. However, OoD generalization algorithms overlook the great variance in the quality of training data, which significantly compromises the accuracy of these methods. In this paper, we theoretically reveal the relationship between training data quality and algorithm performance and analyze the optimal regularization scheme for Lipschitz regularized invariant risk minimization. A novel algorithm is proposed based on the theoretical results to alleviate the influence of low-quality data at both the sample level and the domain level. The experiments on both the regression and classification benchmarks validate the effectiveness of our method with statistical significance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05749v1-abstract-full').style.display = 'none'; document.getElementById('2206.05749v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.09485">arXiv:2205.09485</a> <span> [<a href="https://arxiv.org/pdf/2205.09485">pdf</a>, <a href="https://arxiv.org/format/2205.09485">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Boosting Algorithm for Positive-Unlabeled Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yawen Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mingzhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chenhao Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weitong Chen</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Miao Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.09485v4-abstract-short" style="display: inline;"> Positive-unlabeled (PU) learning deals with binary classification problems when only positive (P) and unlabeled (U) data are available. Many recent PU methods are based on neural networks, but little has been done to develop boosting algorithms for PU learning, despite boosting algorithms' strong performance on many fully supervised classification problems. In this paper, we propose a novel boosti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09485v4-abstract-full').style.display = 'inline'; document.getElementById('2205.09485v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.09485v4-abstract-full" style="display: none;"> Positive-unlabeled (PU) learning deals with binary classification problems when only positive (P) and unlabeled (U) data are available. Many recent PU methods are based on neural networks, but little has been done to develop boosting algorithms for PU learning, despite boosting algorithms' strong performance on many fully supervised classification problems. In this paper, we propose a novel boosting algorithm, AdaPU, for PU learning. Similarly to AdaBoost, AdaPU aims to optimize an empirical exponential loss, but the loss is based on the PU data, rather than on positive-negative (PN) data. As in AdaBoost, we learn a weighted combination of weak classifiers by learning one weak classifier and its weight at a time. However, AdaPU requires a very different algorithm for learning the weak classifiers and determining their weights. This is because AdaPU learns a weak classifier and its weight using a weighted positive-negative (PN) dataset with some negative data weights $-$ the dataset is derived from the original PU data, and the data weights are determined by the current weighted classifier combination, but some data weights are negative. Our experiments showed that AdaPU outperforms neural networks on several benchmark PU datasets, including a large-scale challenging cyber security dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09485v4-abstract-full').style.display = 'none'; document.getElementById('2205.09485v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 24 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.03821">arXiv:2205.03821</a> <span> [<a href="https://arxiv.org/pdf/2205.03821">pdf</a>, <a href="https://arxiv.org/format/2205.03821">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Homography Estimation with Coplanarity-Aware GAN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hong%2C+M">Mingbo Hong</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yuhang Lu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nianjin Ye</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chunyu Lin</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Q">Qijun Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.03821v1-abstract-short" style="display: inline;"> Estimating homography from an image pair is a fundamental problem in image alignment. Unsupervised learning methods have received increasing attention in this field due to their promising performance and label-free training. However, existing methods do not explicitly consider the problem of plane-induced parallax, which will make the predicted homography compromised on multiple planes. In this wo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03821v1-abstract-full').style.display = 'inline'; document.getElementById('2205.03821v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.03821v1-abstract-full" style="display: none;"> Estimating homography from an image pair is a fundamental problem in image alignment. Unsupervised learning methods have received increasing attention in this field due to their promising performance and label-free training. However, existing methods do not explicitly consider the problem of plane-induced parallax, which will make the predicted homography compromised on multiple planes. In this work, we propose a novel method HomoGAN to guide unsupervised homography estimation to focus on the dominant plane. First, a multi-scale transformer network is designed to predict homography from the feature pyramids of input images in a coarse-to-fine fashion. Moreover, we propose an unsupervised GAN to impose coplanarity constraint on the predicted homography, which is realized by using a generator to predict a mask of aligned regions, and then a discriminator to check if two masked feature maps are induced by a single homography. To validate the effectiveness of HomoGAN and its components, we conduct extensive experiments on a large-scale dataset, and the results show that our matching error is 22% lower than the previous SOTA method. Code is available at https://github.com/megvii-research/HomoGAN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03821v1-abstract-full').style.display = 'none'; document.getElementById('2205.03821v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.06298">arXiv:2203.06298</a> <span> [<a href="https://arxiv.org/pdf/2203.06298">pdf</a>, <a href="https://arxiv.org/format/2203.06298">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Neural Topic Modeling with Deep Mutual Information Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kang Xu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaoqiu Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuan-fang Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Tongtong Wu</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+G">Guilin Qi</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Ning Ye</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zheng Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.06298v1-abstract-short" style="display: inline;"> The emerging neural topic models make topic modeling more easily adaptable and extendable in unsupervised text mining. However, the existing neural topic models is difficult to retain representative information of the documents within the learnt topic representation. In this paper, we propose a neural topic model which incorporates deep mutual information estimation, i.e., Neural Topic Modeling wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.06298v1-abstract-full').style.display = 'inline'; document.getElementById('2203.06298v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.06298v1-abstract-full" style="display: none;"> The emerging neural topic models make topic modeling more easily adaptable and extendable in unsupervised text mining. However, the existing neural topic models is difficult to retain representative information of the documents within the learnt topic representation. In this paper, we propose a neural topic model which incorporates deep mutual information estimation, i.e., Neural Topic Modeling with Deep Mutual Information Estimation(NTM-DMIE). NTM-DMIE is a neural network method for topic learning which maximizes the mutual information between the input documents and their latent topic representation. To learn robust topic representation, we incorporate the discriminator to discriminate negative examples and positive examples via adversarial learning. Moreover, we use both global and local mutual information to preserve the rich information of the input documents in the topic representation. We evaluate NTM-DMIE on several metrics, including accuracy of text clustering, with topic representation, topic uniqueness and topic coherence. Compared to the existing methods, the experimental results show that NTM-DMIE can outperform in all the metrics on the four datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.06298v1-abstract-full').style.display = 'none'; document.getElementById('2203.06298v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 page, 10 Figures and 7 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.11963">arXiv:2202.11963</a> <span> [<a href="https://arxiv.org/pdf/2202.11963">pdf</a>, <a href="https://arxiv.org/ps/2202.11963">ps</a>, <a href="https://arxiv.org/format/2202.11963">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A general framework for adaptive two-index fusion attribute weighted naive Bayes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaoliang Zhou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Dongyang Wu</a>, <a href="/search/cs?searchtype=author&query=You%2C+Z">Zitong You</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Ning Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.11963v1-abstract-short" style="display: inline;"> Naive Bayes(NB) is one of the essential algorithms in data mining. However, it is rarely used in reality because of the attribute independent assumption. Researchers have proposed many improved NB methods to alleviate this assumption. Among these methods, due to high efficiency and easy implementation, the filter attribute weighted NB methods receive great attentions. However, there still exists s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.11963v1-abstract-full').style.display = 'inline'; document.getElementById('2202.11963v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.11963v1-abstract-full" style="display: none;"> Naive Bayes(NB) is one of the essential algorithms in data mining. However, it is rarely used in reality because of the attribute independent assumption. Researchers have proposed many improved NB methods to alleviate this assumption. Among these methods, due to high efficiency and easy implementation, the filter attribute weighted NB methods receive great attentions. However, there still exists several challenges, such as the poor representation ability for single index and the fusion problem of two indexes. To overcome above challenges, we propose a general framework for Adaptive Two-index Fusion attribute weighted NB(ATFNB). Two types of data description category are used to represent the correlation between classes and attributes, intercorrelation between attributes and attributes, respectively. ATFNB can select any one index from each category. Then, we introduce a switching factor \{beta} to fuse two indexes, which can adaptively adjust the optimal ratio of the two index on various datasets. And a quick algorithm is proposed to infer the optimal interval of switching factor \{beta}. Finally, the weight of each attribute is calculated using the optimal value \{beta} and is integrated into NB classifier to improve the accuracy. The experimental results on 50 benchmark datasets and a Flavia dataset show that ATFNB outperforms the basic NB and state-of-the-art filter weighted NB models. In addition, the ATFNB framework can improve the existing two-index NB model by introducing the adaptive switching factor \{beta}. Auxiliary experimental results demonstrate the improved model significantly increases the accuracy compared to the original model without the adaptive switching factor \{beta}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.11963v1-abstract-full').style.display = 'none'; document.getElementById('2202.11963v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.02038">arXiv:2109.02038</a> <span> [<a href="https://arxiv.org/pdf/2109.02038">pdf</a>, <a href="https://arxiv.org/format/2109.02038">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NAS-OoD: Neural Architecture Search for Out-of-Distribution Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bai%2C+H">Haoyue Bai</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fengwei Zhou</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+L">Lanqing Hong</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+S+-+G">S. -H. Gary Chan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenguo Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.02038v1-abstract-short" style="display: inline;"> Recent advances on Out-of-Distribution (OoD) generalization reveal the robustness of deep learning models against distribution shifts. However, existing works focus on OoD algorithms, such as invariant risk minimization, domain generalization, or stable learning, without considering the influence of deep model architectures on OoD generalization, which may lead to sub-optimal performance. Neural A… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.02038v1-abstract-full').style.display = 'inline'; document.getElementById('2109.02038v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.02038v1-abstract-full" style="display: none;"> Recent advances on Out-of-Distribution (OoD) generalization reveal the robustness of deep learning models against distribution shifts. However, existing works focus on OoD algorithms, such as invariant risk minimization, domain generalization, or stable learning, without considering the influence of deep model architectures on OoD generalization, which may lead to sub-optimal performance. Neural Architecture Search (NAS) methods search for architecture based on its performance on the training data, which may result in poor generalization for OoD tasks. In this work, we propose robust Neural Architecture Search for OoD generalization (NAS-OoD), which optimizes the architecture with respect to its performance on generated OoD data by gradient descent. Specifically, a data generator is learned to synthesize OoD data by maximizing losses computed by different neural architectures, while the goal for architecture search is to find the optimal architecture parameters that minimize the synthetic OoD data losses. The data generator and the neural architecture are jointly optimized in an end-to-end manner, and the minimax training process effectively discovers robust architectures that generalize well for different distribution shifts. Extensive experimental results show that NAS-OoD achieves superior performance on various OoD generalization benchmarks with deep models having a much fewer number of parameters. In addition, on a real industry dataset, the proposed NAS-OoD method reduces the error rate by more than 70% compared with the state-of-the-art method, demonstrating the proposed method's practicality for real applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.02038v1-abstract-full').style.display = 'none'; document.getElementById('2109.02038v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICCV2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.06028">arXiv:2108.06028</a> <span> [<a href="https://arxiv.org/pdf/2108.06028">pdf</a>, <a href="https://arxiv.org/format/2108.06028">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DeepIC: Coding for Interference Channels via Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chahine%2C+K">Karl Chahine</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+H">Hyeji Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.06028v1-abstract-short" style="display: inline;"> The two-user interference channel is a model for multi one-to-one communications, where two transmitters wish to communicate with their corresponding receivers via a shared wireless medium. Two most common and simple coding schemes are time division (TD) and treating interference as noise (TIN). Interestingly, it is shown that there exists an asymptotic scheme, called Han-Kobayashi scheme, that pe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.06028v1-abstract-full').style.display = 'inline'; document.getElementById('2108.06028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.06028v1-abstract-full" style="display: none;"> The two-user interference channel is a model for multi one-to-one communications, where two transmitters wish to communicate with their corresponding receivers via a shared wireless medium. Two most common and simple coding schemes are time division (TD) and treating interference as noise (TIN). Interestingly, it is shown that there exists an asymptotic scheme, called Han-Kobayashi scheme, that performs better than TD and TIN. However, Han-Kobayashi scheme has impractically high complexity and is designed for asymptotic settings, which leads to a gap between information theory and practice. In this paper, we focus on designing practical codes for interference channels. As it is challenging to analytically design practical codes with feasible complexity, we apply deep learning to learn codes for interference channels. We demonstrate that DeepIC, a convolutional neural network-based code with an iterative decoder, outperforms TD and TIN by a significant margin for two-user additive white Gaussian noise channels with moderate amount of interference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.06028v1-abstract-full').style.display = 'none'; document.getElementById('2108.06028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.03721">arXiv:2106.03721</a> <span> [<a href="https://arxiv.org/pdf/2106.03721">pdf</a>, <a href="https://arxiv.org/format/2106.03721">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OoD-Bench: Quantifying and Understanding Two Dimensions of Out-of-Distribution Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kaican Li</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+H">Haoyue Bai</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+R">Runpeng Yu</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+L">Lanqing Hong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fengwei Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenguo Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jun Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.03721v3-abstract-short" style="display: inline;"> Deep learning has achieved tremendous success with independent and identically distributed (i.i.d.) data. However, the performance of neural networks often degenerates drastically when encountering out-of-distribution (OoD) data, i.e., when training and test data are sampled from different distributions. While a plethora of algorithms have been proposed for OoD generalization, our understanding of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.03721v3-abstract-full').style.display = 'inline'; document.getElementById('2106.03721v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.03721v3-abstract-full" style="display: none;"> Deep learning has achieved tremendous success with independent and identically distributed (i.i.d.) data. However, the performance of neural networks often degenerates drastically when encountering out-of-distribution (OoD) data, i.e., when training and test data are sampled from different distributions. While a plethora of algorithms have been proposed for OoD generalization, our understanding of the data used to train and evaluate these algorithms remains stagnant. In this work, we first identify and measure two distinct kinds of distribution shifts that are ubiquitous in various datasets. Next, through extensive experiments, we compare OoD generalization algorithms across two groups of benchmarks, each dominated by one of the distribution shifts, revealing their strengths on one shift as well as limitations on the other shift. Overall, we position existing datasets and algorithms from different research areas seemingly unconnected into the same coherent picture. It may serve as a foothold that can be resorted to by future OoD generalization research. Our code is available at https://github.com/ynysjtu/ood_bench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.03721v3-abstract-full').style.display = 'none'; document.getElementById('2106.03721v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2022 (oral)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.03479">arXiv:2106.03479</a> <span> [<a href="https://arxiv.org/pdf/2106.03479">pdf</a>, <a href="https://arxiv.org/format/2106.03479">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FINet: Dual Branches Feature Interaction for Partial-to-Partial Point Cloud Registration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hao Xu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nianjin Ye</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guanghui Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+B">Bing Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.03479v3-abstract-short" style="display: inline;"> Data association is important in the point cloud registration. In this work, we propose to solve the partial-to-partial registration from a new perspective, by introducing multi-level feature interactions between the source and the reference clouds at the feature extraction stage, such that the registration can be realized without the attentions or explicit mask estimation for the overlapping dete… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.03479v3-abstract-full').style.display = 'inline'; document.getElementById('2106.03479v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.03479v3-abstract-full" style="display: none;"> Data association is important in the point cloud registration. In this work, we propose to solve the partial-to-partial registration from a new perspective, by introducing multi-level feature interactions between the source and the reference clouds at the feature extraction stage, such that the registration can be realized without the attentions or explicit mask estimation for the overlapping detection as adopted previously. Specifically, we present FINet, a feature interaction-based structure with the capability to enable and strengthen the information associating between the inputs at multiple stages. To achieve this, we first split the features into two components, one for rotation and one for translation, based on the fact that they belong to different solution spaces, yielding a dual branches structure. Second, we insert several interaction modules at the feature extractor for the data association. Third, we propose a transformation sensitivity loss to obtain rotation-attentive and translation-attentive features. Experiments demonstrate that our method performs higher precision and robustness compared to the state-of-the-art traditional and learning-based methods. Code is available at https://github.com/megvii-research/FINet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.03479v3-abstract-full').style.display = 'none'; document.getElementById('2106.03479v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.01777">arXiv:2106.01777</a> <span> [<a href="https://arxiv.org/pdf/2106.01777">pdf</a>, <a href="https://arxiv.org/format/2106.01777">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> LiMIIRL: Lightweight Multiple-Intent Inverse Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Snoswell%2C+A+J">Aaron J. Snoswell</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+S+P+N">Surya P. N. Singh</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.01777v1-abstract-short" style="display: inline;"> Multiple-Intent Inverse Reinforcement Learning (MI-IRL) seeks to find a reward function ensemble to rationalize demonstrations of different but unlabelled intents. Within the popular expectation maximization (EM) framework for learning probabilistic MI-IRL models, we present a warm-start strategy based on up-front clustering of the demonstrations in feature space. Our theoretical analysis shows th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01777v1-abstract-full').style.display = 'inline'; document.getElementById('2106.01777v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.01777v1-abstract-full" style="display: none;"> Multiple-Intent Inverse Reinforcement Learning (MI-IRL) seeks to find a reward function ensemble to rationalize demonstrations of different but unlabelled intents. Within the popular expectation maximization (EM) framework for learning probabilistic MI-IRL models, we present a warm-start strategy based on up-front clustering of the demonstrations in feature space. Our theoretical analysis shows that this warm-start solution produces a near-optimal reward ensemble, provided the behavior modes satisfy mild separation conditions. We also propose a MI-IRL performance metric that generalizes the popular Expected Value Difference measure to directly assesses learned rewards against the ground-truth reward ensemble. Our metric elegantly addresses the difficulty of pairing up learned and ground truth rewards via a min-cost flow formulation, and is efficiently computable. We also develop a MI-IRL benchmark problem that allows for more comprehensive algorithmic evaluations. On this problem, we find our MI-IRL warm-start strategy helps avoid poor quality local minima reward ensembles, resulting in a significant improvement in behavior clustering. Our extensive sensitivity analysis demonstrates that the quality of the learned reward ensembles is improved under various settings, including cases where our theoretical assumptions do not necessarily hold. Finally, we demonstrate the effectiveness of our methods by discovering distinct driving styles in a large real-world dataset of driver GPS trajectories. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01777v1-abstract-full').style.display = 'none'; document.getElementById('2106.01777v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review for NeurIPS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.15346">arXiv:2103.15346</a> <span> [<a href="https://arxiv.org/pdf/2103.15346">pdf</a>, <a href="https://arxiv.org/format/2103.15346">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Motion Basis Learning for Unsupervised Deep Homography Estimation with Subspace Projection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nianjin Ye</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chuan Wang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+H">Haoqiang Fan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.15346v2-abstract-short" style="display: inline;"> In this paper, we introduce a new framework for unsupervised deep homography estimation. Our contributions are 3 folds. First, unlike previous methods that regress 4 offsets for a homography, we propose a homography flow representation, which can be estimated by a weighted sum of 8 pre-defined homography flow bases. Second, considering a homography contains 8 Degree-of-Freedoms (DOFs) that is much… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15346v2-abstract-full').style.display = 'inline'; document.getElementById('2103.15346v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.15346v2-abstract-full" style="display: none;"> In this paper, we introduce a new framework for unsupervised deep homography estimation. Our contributions are 3 folds. First, unlike previous methods that regress 4 offsets for a homography, we propose a homography flow representation, which can be estimated by a weighted sum of 8 pre-defined homography flow bases. Second, considering a homography contains 8 Degree-of-Freedoms (DOFs) that is much less than the rank of the network features, we propose a Low Rank Representation (LRR) block that reduces the feature rank, so that features corresponding to the dominant motions are retained while others are rejected. Last, we propose a Feature Identity Loss (FIL) to enforce the learned image feature warp-equivariant, meaning that the result should be identical if the order of warp operation and feature extraction is swapped. With this constraint, the unsupervised optimization is achieved more effectively and more stable features are learned. Extensive experiments are conducted to demonstrate the effectiveness of all the newly proposed components, and results show that our approach outperforms the state-of-the-art on the homography benchmark datasets both qualitatively and quantitatively. Code is available at https://github.com/megvii-research/BasesHomo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15346v2-abstract-full').style.display = 'none'; document.getElementById('2103.15346v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.11512">arXiv:2103.11512</a> <span> [<a href="https://arxiv.org/pdf/2103.11512">pdf</a>, <a href="https://arxiv.org/format/2103.11512">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Robust Multi-Modal Policies for Industrial Assembly via Reinforcement Learning and Demonstrations: A Large-Scale Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jianlan Luo</a>, <a href="/search/cs?searchtype=author&query=Sushkov%2C+O">Oleg Sushkov</a>, <a href="/search/cs?searchtype=author&query=Pevceviciute%2C+R">Rugile Pevceviciute</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Su%2C+C">Chang Su</a>, <a href="/search/cs?searchtype=author&query=Vecerik%2C+M">Mel Vecerik</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Ning Ye</a>, <a href="/search/cs?searchtype=author&query=Schaal%2C+S">Stefan Schaal</a>, <a href="/search/cs?searchtype=author&query=Scholz%2C+J">Jon Scholz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.11512v4-abstract-short" style="display: inline;"> Over the past several years there has been a considerable research investment into learning-based approaches to industrial assembly, but despite significant progress these techniques have yet to be adopted by industry. We argue that it is the prohibitively large design space for Deep Reinforcement Learning (DRL), rather than algorithmic limitations per se, that are truly responsible for this lack… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.11512v4-abstract-full').style.display = 'inline'; document.getElementById('2103.11512v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.11512v4-abstract-full" style="display: none;"> Over the past several years there has been a considerable research investment into learning-based approaches to industrial assembly, but despite significant progress these techniques have yet to be adopted by industry. We argue that it is the prohibitively large design space for Deep Reinforcement Learning (DRL), rather than algorithmic limitations per se, that are truly responsible for this lack of adoption. Pushing these techniques into the industrial mainstream requires an industry-oriented paradigm which differs significantly from the academic mindset. In this paper we define criteria for industry-oriented DRL, and perform a thorough comparison according to these criteria of one family of learning approaches, DRL from demonstration, against a professional industrial integrator on the recently established NIST assembly benchmark. We explain the design choices, representing several years of investigation, which enabled our DRL system to consistently outperform the integrator baseline in terms of both speed and reliability. Finally, we conclude with a competition between our DRL system and a human on a challenge task of insertion into a randomly moving target. This study suggests that DRL is capable of outperforming not only established engineered approaches, but the human motor system as well, and that there remains significant room for improvement. Videos can be found on our project website: https://sites.google.com/view/shield-nist. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.11512v4-abstract-full').style.display = 'none'; document.getElementById('2103.11512v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">RSS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.09382">arXiv:2012.09382</a> <span> [<a href="https://arxiv.org/pdf/2012.09382">pdf</a>, <a href="https://arxiv.org/format/2012.09382">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DecAug: Out-of-Distribution Generalization via Decomposed Feature Representation and Semantic Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bai%2C+H">Haoyue Bai</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+R">Rui Sun</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+L">Lanqing Hong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fengwei Zhou</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+H">Han-Jia Ye</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+S+-+G">S. -H. Gary Chan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenguo Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.09382v1-abstract-short" style="display: inline;"> While deep learning demonstrates its strong ability to handle independent and identically distributed (IID) data, it often suffers from out-of-distribution (OoD) generalization, where the test data come from another distribution (w.r.t. the training one). Designing a general OoD generalization framework to a wide range of applications is challenging, mainly due to possible correlation shift and di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.09382v1-abstract-full').style.display = 'inline'; document.getElementById('2012.09382v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.09382v1-abstract-full" style="display: none;"> While deep learning demonstrates its strong ability to handle independent and identically distributed (IID) data, it often suffers from out-of-distribution (OoD) generalization, where the test data come from another distribution (w.r.t. the training one). Designing a general OoD generalization framework to a wide range of applications is challenging, mainly due to possible correlation shift and diversity shift in the real world. Most of the previous approaches can only solve one specific distribution shift, such as shift across domains or the extrapolation of correlation. To address that, we propose DecAug, a novel decomposed feature representation and semantic augmentation approach for OoD generalization. DecAug disentangles the category-related and context-related features. Category-related features contain causal information of the target object, while context-related features describe the attributes, styles, backgrounds, or scenes, causing distribution shifts between training and test data. The decomposition is achieved by orthogonalizing the two gradients (w.r.t. intermediate features) of losses for predicting category and context labels. Furthermore, we perform gradient-based augmentation on context-related features to improve the robustness of the learned representations. Experimental results show that DecAug outperforms other state-of-the-art methods on various OoD datasets, which is among the very few methods that can deal with different types of OoD generalization challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.09382v1-abstract-full').style.display = 'none'; document.getElementById('2012.09382v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.08112">arXiv:2012.08112</a> <span> [<a href="https://arxiv.org/pdf/2012.08112">pdf</a>, <a href="https://arxiv.org/format/2012.08112">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Amata: An Annealing Mechanism for Adversarial Training Acceleration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qianxiao Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiao-Yun Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhanxing Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.08112v3-abstract-short" style="display: inline;"> Despite the empirical success in various domains, it has been revealed that deep neural networks are vulnerable to maliciously perturbed input data that much degrade their performance. This is known as adversarial attacks. To counter adversarial attacks, adversarial training formulated as a form of robust optimization has been demonstrated to be effective. However, conducting adversarial training… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.08112v3-abstract-full').style.display = 'inline'; document.getElementById('2012.08112v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.08112v3-abstract-full" style="display: none;"> Despite the empirical success in various domains, it has been revealed that deep neural networks are vulnerable to maliciously perturbed input data that much degrade their performance. This is known as adversarial attacks. To counter adversarial attacks, adversarial training formulated as a form of robust optimization has been demonstrated to be effective. However, conducting adversarial training brings much computational overhead compared with standard training. In order to reduce the computational cost, we propose an annealing mechanism, Amata, to reduce the overhead associated with adversarial training. The proposed Amata is provably convergent, well-motivated from the lens of optimal control theory and can be combined with existing acceleration methods to further enhance performance. It is demonstrated that on standard datasets, Amata can achieve similar or better robustness with around 1/3 to 1/2 the computational time compared with traditional methods. In addition, Amata can be incorporated into other adversarial training acceleration algorithms (e.g. YOPO, Free, Fast, and ATTA), which leads to further reduction in computational time on large-scale problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.08112v3-abstract-full').style.display = 'none'; document.getElementById('2012.08112v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.02782">arXiv:2012.02782</a> <span> [<a href="https://arxiv.org/pdf/2012.02782">pdf</a>, <a href="https://arxiv.org/format/2012.02782">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Batch Group Normalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiao-Yun Zhou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jiacheng Sun</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+X">Xu Lan</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Q">Qijun Luo</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+B">Bo-Lin Lai</a>, <a href="/search/cs?searchtype=author&query=Esperanca%2C+P">Pedro Esperanca</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+G">Guang-Zhong Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenguo Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.02782v2-abstract-short" style="display: inline;"> Deep Convolutional Neural Networks (DCNNs) are hard and time-consuming to train. Normalization is one of the effective solutions. Among previous normalization methods, Batch Normalization (BN) performs well at medium and large batch sizes and is with good generalizability to multiple vision tasks, while its performance degrades significantly at small batch sizes. In this paper, we find that BN sat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.02782v2-abstract-full').style.display = 'inline'; document.getElementById('2012.02782v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.02782v2-abstract-full" style="display: none;"> Deep Convolutional Neural Networks (DCNNs) are hard and time-consuming to train. Normalization is one of the effective solutions. Among previous normalization methods, Batch Normalization (BN) performs well at medium and large batch sizes and is with good generalizability to multiple vision tasks, while its performance degrades significantly at small batch sizes. In this paper, we find that BN saturates at extreme large batch sizes, i.e., 128 images per worker, i.e., GPU, as well and propose that the degradation/saturation of BN at small/extreme large batch sizes is caused by noisy/confused statistic calculation. Hence without adding new trainable parameters, using multiple-layer or multi-iteration information, or introducing extra computation, Batch Group Normalization (BGN) is proposed to solve the noisy/confused statistic calculation of BN at small/extreme large batch sizes with introducing the channel, height and width dimension to compensate. The group technique in Group Normalization (GN) is used and a hyper-parameter G is used to control the number of feature instances used for statistic calculation, hence to offer neither noisy nor confused statistic for different batch sizes. We empirically demonstrate that BGN consistently outperforms BN, Instance Normalization (IN), Layer Normalization (LN), GN, and Positional Normalization (PN), across a wide spectrum of vision tasks, including image classification, Neural Architecture Search (NAS), adversarial learning, Few Shot Learning (FSL) and Unsupervised Domain Adaptation (UDA), indicating its good performance, robust stability to batch size and wide generalizability. For example, for training ResNet-50 on ImageNet with a batch size of 2, BN achieves Top1 accuracy of 66.512% while BGN achieves 76.096% with notable improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.02782v2-abstract-full').style.display = 'none'; document.getElementById('2012.02782v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.00889">arXiv:2012.00889</a> <span> [<a href="https://arxiv.org/pdf/2012.00889">pdf</a>, <a href="https://arxiv.org/format/2012.00889">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/SSCI47803.2020.9308391">10.1109/SSCI47803.2020.9308391 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Revisiting Maximum Entropy Inverse Reinforcement Learning: New Perspectives and Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Snoswell%2C+A+J">Aaron J. Snoswell</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+S+P+N">Surya P. N. Singh</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.00889v2-abstract-short" style="display: inline;"> We provide new perspectives and inference algorithms for Maximum Entropy (MaxEnt) Inverse Reinforcement Learning (IRL), which provides a principled method to find a most non-committal reward function consistent with given expert demonstrations, among many consistent reward functions. We first present a generalized MaxEnt formulation based on minimizing a KL-divergence instead of maximizing an en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00889v2-abstract-full').style.display = 'inline'; document.getElementById('2012.00889v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.00889v2-abstract-full" style="display: none;"> We provide new perspectives and inference algorithms for Maximum Entropy (MaxEnt) Inverse Reinforcement Learning (IRL), which provides a principled method to find a most non-committal reward function consistent with given expert demonstrations, among many consistent reward functions. We first present a generalized MaxEnt formulation based on minimizing a KL-divergence instead of maximizing an entropy. This improves the previous heuristic derivation of the MaxEnt IRL model (for stochastic MDPs), allows a unified view of MaxEnt IRL and Relative Entropy IRL, and leads to a model-free learning algorithm for the MaxEnt IRL model. Second, a careful review of existing inference algorithms and implementations showed that they approximately compute the marginals required for learning the model. We provide examples to illustrate this, and present an efficient and exact inference algorithm. Our algorithm can handle variable length demonstrations; in addition, while a basic version takes time quadratic in the maximum demonstration length L, an improved version of this algorithm reduces this to linear using a padding trick. Experiments show that our exact algorithm improves reward learning as compared to the approximate ones. Furthermore, our algorithm scales up to a large, real-world dataset involving driver behaviour forecasting. We provide an optimized implementation compatible with the OpenAI Gym interface. Our new insight and algorithms could possibly lead to further interest and exploration of the original MaxEnt IRL model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00889v2-abstract-full').style.display = 'none'; document.getElementById('2012.00889v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at the 2020 IEEE Symposium Series on Computational Intelligence (SSCI)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.05423">arXiv:2009.05423</a> <span> [<a href="https://arxiv.org/pdf/2009.05423">pdf</a>, <a href="https://arxiv.org/format/2009.05423">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s10994-021-06049-9">10.1007/s10994-021-06049-9 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Achieving Adversarial Robustness via Sparsity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shufan Wang</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+N">Ningyi Liao</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+L">Liyao Xiang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nanyang Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Quanshi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.05423v1-abstract-short" style="display: inline;"> Network pruning has been known to produce compact models without much accuracy degradation. However, how the pruning process affects a network's robustness and the working mechanism behind remain unresolved. In this work, we theoretically prove that the sparsity of network weights is closely associated with model robustness. Through experiments on a variety of adversarial pruning methods, we find… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.05423v1-abstract-full').style.display = 'inline'; document.getElementById('2009.05423v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.05423v1-abstract-full" style="display: none;"> Network pruning has been known to produce compact models without much accuracy degradation. However, how the pruning process affects a network's robustness and the working mechanism behind remain unresolved. In this work, we theoretically prove that the sparsity of network weights is closely associated with model robustness. Through experiments on a variety of adversarial pruning methods, we find that weights sparsity will not hurt but improve robustness, where both weights inheritance from the lottery ticket and adversarial training improve model robustness in network pruning. Based on these findings, we propose a novel adversarial training method called inverse weights inheritance, which imposes sparse weights distribution on a large network by inheriting weights from a small network, thereby improving the robustness of the large network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.05423v1-abstract-full').style.display = 'none'; document.getElementById('2009.05423v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.16637">arXiv:2006.16637</a> <span> [<a href="https://arxiv.org/pdf/2006.16637">pdf</a>, <a href="https://arxiv.org/format/2006.16637">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OccInpFlow: Occlusion-Inpainting Optical Flow Estimation by Unsupervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+K">Kunming Luo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chuan Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nianjin Ye</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jue Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.16637v1-abstract-short" style="display: inline;"> Occlusion is an inevitable and critical problem in unsupervised optical flow learning. Existing methods either treat occlusions equally as non-occluded regions or simply remove them to avoid incorrectness. However, the occlusion regions can provide effective information for optical flow learning. In this paper, we present OccInpFlow, an occlusion-inpainting framework to make full use of occlusion… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.16637v1-abstract-full').style.display = 'inline'; document.getElementById('2006.16637v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.16637v1-abstract-full" style="display: none;"> Occlusion is an inevitable and critical problem in unsupervised optical flow learning. Existing methods either treat occlusions equally as non-occluded regions or simply remove them to avoid incorrectness. However, the occlusion regions can provide effective information for optical flow learning. In this paper, we present OccInpFlow, an occlusion-inpainting framework to make full use of occlusion regions. Specifically, a new appearance-flow network is proposed to inpaint occluded flows based on the image content. Moreover, a boundary warp is proposed to deal with occlusions caused by displacement beyond image border. We conduct experiments on multiple leading flow benchmark data sets such as Flying Chairs, KITTI and MPI-Sintel, which demonstrate that the performance is significantly improved by our proposed occlusion handling framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.16637v1-abstract-full').style.display = 'none'; document.getElementById('2006.16637v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.09884">arXiv:2002.09884</a> <span> [<a href="https://arxiv.org/pdf/2002.09884">pdf</a>, <a href="https://arxiv.org/format/2002.09884">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Discriminative Particle Filter Reinforcement Learning for Complex Partial Observations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xiao Ma</a>, <a href="/search/cs?searchtype=author&query=Karkus%2C+P">Peter Karkus</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+D">David Hsu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+W+S">Wee Sun Lee</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.09884v1-abstract-short" style="display: inline;"> Deep reinforcement learning is successful in decision making for sophisticated games, such as Atari, Go, etc. However, real-world decision making often requires reasoning with partial information extracted from complex visual observations. This paper presents Discriminative Particle Filter Reinforcement Learning (DPFRL), a new reinforcement learning framework for complex partial observations. DPFR… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.09884v1-abstract-full').style.display = 'inline'; document.getElementById('2002.09884v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.09884v1-abstract-full" style="display: none;"> Deep reinforcement learning is successful in decision making for sophisticated games, such as Atari, Go, etc. However, real-world decision making often requires reasoning with partial information extracted from complex visual observations. This paper presents Discriminative Particle Filter Reinforcement Learning (DPFRL), a new reinforcement learning framework for complex partial observations. DPFRL encodes a differentiable particle filter in the neural network policy for explicit reasoning with partial observations over time. The particle filter maintains a belief using learned discriminative update, which is trained end-to-end for decision making. We show that using the discriminative update instead of standard generative models results in significantly improved performance, especially for tasks with complex visual observations, because they circumvent the difficulty of modeling complex observations that are irrelevant to decision making. In addition, to extract features from the particle belief, we propose a new type of belief feature based on the moment generating function. DPFRL outperforms state-of-the-art POMDP RL models in Flickering Atari Games, an existing POMDP RL benchmark, and in Natural Flickering Atari Games, a new, more challenging POMDP RL benchmark introduced in this paper. Further, DPFRL performs well for visual navigation with real-world data in the Habitat environment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.09884v1-abstract-full').style.display = 'none'; document.getElementById('2002.09884v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICLR 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.05131">arXiv:1912.05131</a> <span> [<a href="https://arxiv.org/pdf/1912.05131">pdf</a>, <a href="https://arxiv.org/format/1912.05131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DeepMeshFlow: Content Adaptive Mesh Deformation for Robust Image Registration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nianjin Ye</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chuan Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+L">Lanpeng Jia</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jue Wang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+Y">Yongqing Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.05131v1-abstract-short" style="display: inline;"> Image alignment by mesh warps, such as meshflow, is a fundamental task which has been widely applied in various vision applications(e.g., multi-frame HDR/denoising, video stabilization). Traditional mesh warp methods detect and match image features, where the quality of alignment highly depends on the quality of image features. However, the image features are not robust in occurrence of low-textur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.05131v1-abstract-full').style.display = 'inline'; document.getElementById('1912.05131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.05131v1-abstract-full" style="display: none;"> Image alignment by mesh warps, such as meshflow, is a fundamental task which has been widely applied in various vision applications(e.g., multi-frame HDR/denoising, video stabilization). Traditional mesh warp methods detect and match image features, where the quality of alignment highly depends on the quality of image features. However, the image features are not robust in occurrence of low-texture and low-light scenes. Deep homography methods, on the other hand, are free from such problem by learning deep features for robust performance. However, a homography is limited to plane motions. In this work, we present a deep meshflow motion model, which takes two images as input and output a sparse motion field with motions located at mesh vertexes. The deep meshflow enjoys the merics of meshflow that can describe nonlinear motions while also shares advantage of deep homography that is robust against challenging textureless scenarios. In particular, a new unsupervised network structure is presented with content-adaptive capability. On one hand, the image content that cannot be aligned under mesh representation are rejected by our learned mask, similar to the RANSAC procedure. On the other hand, we learn multiple mesh resolutions, combining to a non-uniform mesh division. Moreover, a comprehensive dataset is presented, covering various scenes for training and testing. The comparison between both traditional mesh warp methods and deep based methods show the effectiveness of our deep meshflow motion model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.05131v1-abstract-full').style.display = 'none'; document.getElementById('1912.05131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 8 figures. arXiv admin note: text overlap with arXiv:1909.05983</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.08731">arXiv:1910.08731</a> <span> [<a href="https://arxiv.org/pdf/1910.08731">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JSEN.2019.2945595">10.1109/JSEN.2019.2945595 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Type of Virtual Force based Energy-hole Mitigation Strategy for Sensor Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sha%2C+C">Chao Sha</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+C">Chunhui Ren</a>, <a href="/search/cs?searchtype=author&query=Malekian%2C+R">Reza Malekian</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Min Wu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haiping Huang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Ning Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.08731v1-abstract-short" style="display: inline;"> In the era of Big Data and Mobile Internet, how to ensure the terminal devices (e.g., sensor nodes) work steadily for a long time is one of the key issues to improve the efficiency of the whole network. However, a lot of facts have shown that the unattended equipments are prone to failure due to energy exhaustion, physical damage and other reasons. This may result in the emergence of energy-hole,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.08731v1-abstract-full').style.display = 'inline'; document.getElementById('1910.08731v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.08731v1-abstract-full" style="display: none;"> In the era of Big Data and Mobile Internet, how to ensure the terminal devices (e.g., sensor nodes) work steadily for a long time is one of the key issues to improve the efficiency of the whole network. However, a lot of facts have shown that the unattended equipments are prone to failure due to energy exhaustion, physical damage and other reasons. This may result in the emergence of energy-hole, seriously affecting network performance and shortening its lifetime. To reduce data redundancy and avoid the generation of sensing blind areas, a type of Virtual Force based Energy-hole Mitigation strategy (VFEM) is proposed in this paper. Firstly, the virtual force (gravitation and repulsion) between nodes is introduced that makes nodes distribute as uniformly as possible. Secondly, in order to alleviate the "energy-hole problem", the network is divided into several annuluses with the same width. Then, another type of virtual force, named "virtual gravity generated by annulus", is proposed to further optimize the positions of nodes in each annulus. Finally, with the help of the "data forwarding area", the optimal paths for data uploading can be selected out, which effectively balances energy consumption of nodes. Experiment results show that, VFEM has a relatively good performance on postponing the generation time of energy-holes as well as prolonging the network lifetime compared with other typical energy-hole mitigation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.08731v1-abstract-full').style.display = 'none'; document.getElementById('1910.08731v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.03742">arXiv:1910.03742</a> <span> [<a href="https://arxiv.org/pdf/1910.03742">pdf</a>, <a href="https://arxiv.org/format/1910.03742">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Greedy Convex Ensemble </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tan Nguyen</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nan Ye</a>, <a href="/search/cs?searchtype=author&query=Bartlett%2C+P+L">Peter L. Bartlett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.03742v2-abstract-short" style="display: inline;"> We consider learning a convex combination of basis models, and present some new theoretical and empirical results that demonstrate the effectiveness of a greedy approach. Theoretically, we first consider whether we can use linear, instead of convex, combinations, and obtain generalization results similar to existing ones for learning from a convex hull. We obtain a negative result that even the li… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.03742v2-abstract-full').style.display = 'inline'; document.getElementById('1910.03742v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.03742v2-abstract-full" style="display: none;"> We consider learning a convex combination of basis models, and present some new theoretical and empirical results that demonstrate the effectiveness of a greedy approach. Theoretically, we first consider whether we can use linear, instead of convex, combinations, and obtain generalization results similar to existing ones for learning from a convex hull. We obtain a negative result that even the linear hull of very simple basis functions can have unbounded capacity, and is thus prone to overfitting; on the other hand, convex hulls are still rich but have bounded capacities. Secondly, we obtain a generalization bound for a general class of Lipschitz loss functions. Empirically, we first discuss how a convex combination can be greedily learned with early stopping, and how a convex combination can be non-greedily learned when the number of basis models is known a priori. Our experiments suggest that the greedy scheme is competitive with or better than several baselines, including boosting and random forests. The greedy algorithm requires little effort in hyper-parameter tuning, and also seems able to adapt to the underlying complexity of the problem. Our code is available at https://github.com/tan1889/gce. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.03742v2-abstract-full').style.display = 'none'; document.getElementById('1910.03742v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Replace the previous version with the camera ready version accepted for IJCAI 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.05983">arXiv:1909.05983</a> <span> [<a href="https://arxiv.org/pdf/1909.05983">pdf</a>, <a href="https://arxiv.org/format/1909.05983">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Content-Aware Unsupervised Deep Homography Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jirong Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chuan Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+L">Lanpeng Jia</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Nianjin Ye</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jue Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Ji Zhou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jian Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.05983v2-abstract-short" style="display: inline;"> Homography estimation is a basic image alignment method in many applications. It is usually conducted by extracting and matching sparse feature points, which are error-prone in low-light and low-texture images. On the other hand, previous deep homography approaches use either synthetic images for supervised learning or aerial images for unsupervised learning, both ignoring the importance of handli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.05983v2-abstract-full').style.display = 'inline'; document.getElementById('1909.05983v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.05983v2-abstract-full" style="display: none;"> Homography estimation is a basic image alignment method in many applications. It is usually conducted by extracting and matching sparse feature points, which are error-prone in low-light and low-texture images. On the other hand, previous deep homography approaches use either synthetic images for supervised learning or aerial images for unsupervised learning, both ignoring the importance of handling depth disparities and moving objects in real world applications. To overcome these problems, in this work we propose an unsupervised deep homography method with a new architecture design. In the spirit of the RANSAC procedure in traditional methods, we specifically learn an outlier mask to only select reliable regions for homography estimation. We calculate loss with respect to our learned deep features instead of directly comparing image content as did previously. To achieve the unsupervised training, we also formulate a novel triplet loss customized for our network. We verify our method by conducting comprehensive comparisons on a new dataset that covers a wide range of scenes with varying degrees of difficulties for the task. Experimental results reveal that our method outperforms the state-of-the-art including deep solutions and feature-based solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.05983v2-abstract-full').style.display = 'none'; document.getElementById('1909.05983v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECCV 2020 (Oral, Top 2%, 3 over 3 Strong Accepts). Jirong Zhang and Chuan Wang are joint first authors, and Shuaicheng Liu is the corresponding author</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ye%2C+N&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ye%2C+N&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+N&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>