Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–28 of 28 results for author: <span class="mathjax">Lian, W</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Lian%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lian, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lian%2C+W&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lian, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01424">arXiv:2411.01424</a> <span> [<a href="https://arxiv.org/pdf/2411.01424">pdf</a>, <a href="https://arxiv.org/format/2411.01424">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Effective Community Detection Over Streaming Bipartite Networks (Technical Report) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+N">Nan Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yutong Ye</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+Y+W+X">Yuyang Wang Xiang Lian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mingsong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01424v1-abstract-short" style="display: inline;"> The streaming bipartite graph is extensively used to model the dynamic relationship between two types of entities in many real-world applications, such as movie recommendations, location-based services, and online shopping. Since it contains abundant information, discovering the dense subgraph with high structural cohesiveness (i.e., community detection) in the bipartite streaming graph is becomin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01424v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01424v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01424v1-abstract-full" style="display: none;"> The streaming bipartite graph is extensively used to model the dynamic relationship between two types of entities in many real-world applications, such as movie recommendations, location-based services, and online shopping. Since it contains abundant information, discovering the dense subgraph with high structural cohesiveness (i.e., community detection) in the bipartite streaming graph is becoming a valuable problem. Inspired by this, in this paper, we study the structure of community on the butterfly motif in the bipartite graph. We propose a novel problem, named Community Detection over Streaming Bipartite Network (CD-SBN), which aims to retrieve qualified communities with user-specific query keywords and high structural cohesiveness at snapshot and continuous scenarios. In particular, we formulate the user relationship score in the weighted bipartite network via the butterfly pattern and define a novel $(k,r,蟽)$-bitruss as the community structure. To efficiently tackle the CD-SBN problem, we design effective pruning strategies to rule out false alarms of $(k,r,蟽)$-bitruss and propose a hierarchical synopsis to facilitate the CD-SBN processing. Due to the dynamic of streaming bipartite networks, we devise an efficient procedure for incremental graph maintenance. We develop an efficient algorithm to answer the snapshot and continuous CD-SBN query by traversing the synopsis and applying the pruning strategies. With extensive experiments, we demonstrate the efficiency and effectiveness of our proposed CD-SBN processing approach over real/synthetic streaming bipartite networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01424v1-abstract-full').style.display = 'none'; document.getElementById('2411.01424v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19528">arXiv:2409.19528</a> <span> [<a href="https://arxiv.org/pdf/2409.19528">pdf</a>, <a href="https://arxiv.org/format/2409.19528">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> FoAM: Foresight-Augmented Multi-Task Imitation Policy for Robotic Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+L">Litao Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wentao Wang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yifan Han</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhuoli Xie</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+P">Pengfei Yi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Junyan Li</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yi Qin</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19528v1-abstract-short" style="display: inline;"> Multi-task imitation learning (MTIL) has shown significant potential in robotic manipulation by enabling agents to perform various tasks using a unified policy. This simplifies the policy deployment and enhances the agent's adaptability across different contexts. However, key challenges remain, such as maintaining action reliability (e.g., avoiding abnormal action sequences that deviate from nomin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19528v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19528v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19528v1-abstract-full" style="display: none;"> Multi-task imitation learning (MTIL) has shown significant potential in robotic manipulation by enabling agents to perform various tasks using a unified policy. This simplifies the policy deployment and enhances the agent's adaptability across different contexts. However, key challenges remain, such as maintaining action reliability (e.g., avoiding abnormal action sequences that deviate from nominal task trajectories), distinguishing between similar tasks, and generalizing to unseen scenarios. To address these challenges, we introduce the Foresight-Augmented Manipulation Policy (FoAM), an innovative MTIL framework. FoAM not only learns to mimic expert actions but also predicts the visual outcomes of those actions to enhance decision-making. Additionally, it integrates multi-modal goal inputs, such as visual and language prompts, overcoming the limitations of single-conditioned policies. We evaluated FoAM across over 100 tasks in both simulation and real-world settings, demonstrating that it significantly improves IL policy performance, outperforming current state-of-the-art IL baselines by up to 41% in success rate. Furthermore, we released a simulation benchmark for robotic manipulation, featuring 10 task suites and over 80 challenging tasks designed for multi-task policy training and evaluation. See project homepage https://projFoAM.github.io/ for project details. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19528v1-abstract-full').style.display = 'none'; document.getElementById('2409.19528v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01869">arXiv:2407.01869</a> <span> [<a href="https://arxiv.org/pdf/2407.01869">pdf</a>, <a href="https://arxiv.org/format/2407.01869">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Let it shine: Autofluorescence of Papanicolaou-stain improves AI-based cytological oral cancer detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenyi Lian</a>, <a href="/search/cs?searchtype=author&query=Lindblad%2C+J">Joakim Lindblad</a>, <a href="/search/cs?searchtype=author&query=Stark%2C+C+R">Christina Runow Stark</a>, <a href="/search/cs?searchtype=author&query=Hirsch%2C+J">Jan-Micha茅l Hirsch</a>, <a href="/search/cs?searchtype=author&query=Sladoje%2C+N">Nata拧a Sladoje</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01869v2-abstract-short" style="display: inline;"> Oral cancer is a global health challenge. It is treatable if detected early, but it is often fatal in late stages. There is a shift from the invasive and time-consuming tissue sampling and histological examination, toward non-invasive brush biopsies and cytological examination. Reliable computer-assisted methods are essential for cost-effective and accurate cytological analysis, but the lack of de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01869v2-abstract-full').style.display = 'inline'; document.getElementById('2407.01869v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01869v2-abstract-full" style="display: none;"> Oral cancer is a global health challenge. It is treatable if detected early, but it is often fatal in late stages. There is a shift from the invasive and time-consuming tissue sampling and histological examination, toward non-invasive brush biopsies and cytological examination. Reliable computer-assisted methods are essential for cost-effective and accurate cytological analysis, but the lack of detailed cell-level annotations impairs model effectiveness. This study aims to improve AI-based oral cancer detection using multimodal imaging and deep fusion. We combine brightfield and fluorescence whole slide microscopy imaging to analyze Papanicolaou-stained liquid-based cytology slides of brush biopsies collected from both healthy and cancer patients. Due to limited cytological annotations, we utilize a weakly supervised deep learning approach using only patient-level labels. We evaluate various multimodal fusion strategies, including early, late, and three recent intermediate fusion methods. Our results show: (i) fluorescence imaging of Papanicolaou-stained samples provides substantial diagnostic information; (ii) multimodal fusion enhances classification and cancer detection accuracy over single-modality methods. Intermediate fusion is the leading method among the studied approaches. Specifically, the Co-Attention Fusion Network (CAFNet) model excels with an F1 score of 83.34% and accuracy of 91.79%, surpassing human performance on the task. Additional tests highlight the need for precise image registration to optimize multimodal analysis benefits. This study advances cytopathology by combining deep learning and multimodal imaging to enhance early, non-invasive detection of oral cancer, improving diagnostic accuracy and streamlining clinical workflows. The developed pipeline is also applicable in other cytological settings. Our codes and dataset are available online for further research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01869v2-abstract-full').style.display = 'none'; document.getElementById('2407.01869v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 12 figures, 11 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.08589">arXiv:2405.08589</a> <span> [<a href="https://arxiv.org/pdf/2405.08589">pdf</a>, <a href="https://arxiv.org/format/2405.08589">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Variable Substitution and Bilinear Programming for Aligning Partially Overlapping Point Sets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wei Lian</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+Z">Zhesen Cui</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+F">Fei Ma</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+H">Hang Pan</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+W">Wangmeng Zuo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.08589v1-abstract-short" style="display: inline;"> In many applications, the demand arises for algorithms capable of aligning partially overlapping point sets while remaining invariant to the corresponding transformations. This research presents a method designed to meet such requirements through minimization of the objective function of the robust point matching (RPM) algorithm. First, we show that the RPM objective is a cubic polynomial. Then, t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08589v1-abstract-full').style.display = 'inline'; document.getElementById('2405.08589v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.08589v1-abstract-full" style="display: none;"> In many applications, the demand arises for algorithms capable of aligning partially overlapping point sets while remaining invariant to the corresponding transformations. This research presents a method designed to meet such requirements through minimization of the objective function of the robust point matching (RPM) algorithm. First, we show that the RPM objective is a cubic polynomial. Then, through variable substitution, we transform the RPM objective to a quadratic function. Leveraging the convex envelope of bilinear monomials, we proceed to relax the resulting objective function, thus obtaining a lower bound problem that can be conveniently decomposed into distinct linear assignment and low-dimensional convex quadratic program components, both amenable to efficient optimization. Furthermore, a branch-and-bound (BnB) algorithm is devised, which solely branches over the transformation parameters, thereby boosting convergence rate. Empirical evaluations demonstrate better robustness of the proposed methodology against non-rigid deformation, positional noise, and outliers, particularly in scenarios where outliers remain distinct from inliers, when compared with prevailing state-of-the-art approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08589v1-abstract-full').style.display = 'none'; document.getElementById('2405.08589v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14248">arXiv:2404.14248</a> <span> [<a href="https://arxiv.org/pdf/2404.14248">pdf</a>, <a href="https://arxiv.org/format/2404.14248">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2024 Challenge on Low Light Image Enhancement: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaoning Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zongwei Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ao Li</a>, <a href="/search/cs?searchtype=author&query=Vasluianu%2C+F">Florin-Alexandru Vasluianu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yulun Zhang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Le Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+C">Ce Zhu</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zhi Jin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Hongjun Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chenxi Wang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+H">Haitao Ling</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yuanhao Cai</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+H">Hao Bian</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuxin Zheng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jing Lin</a>, <a href="/search/cs?searchtype=author&query=Yuille%2C+A">Alan Yuille</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+B">Ben Shao</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jin Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianli Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Mohao Wu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yixu Feng</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuo Hou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Haotian Lin</a> , et al. (87 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14248v1-abstract-short" style="display: inline;"> This paper reviews the NTIRE 2024 low light image enhancement challenge, highlighting the proposed solutions and results. The aim of this challenge is to discover an effective network design or solution capable of generating brighter, clearer, and visually appealing results when dealing with a variety of conditions, including ultra-high resolution (4K and beyond), non-uniform illumination, backlig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14248v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14248v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14248v1-abstract-full" style="display: none;"> This paper reviews the NTIRE 2024 low light image enhancement challenge, highlighting the proposed solutions and results. The aim of this challenge is to discover an effective network design or solution capable of generating brighter, clearer, and visually appealing results when dealing with a variety of conditions, including ultra-high resolution (4K and beyond), non-uniform illumination, backlighting, extreme darkness, and night scenes. A notable total of 428 participants registered for the challenge, with 22 teams ultimately making valid submissions. This paper meticulously evaluates the state-of-the-art advancements in enhancing low-light images, reflecting the significant progress and creativity in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14248v1-abstract-full').style.display = 'none'; document.getElementById('2404.14248v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NTIRE 2024 Challenge Report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09735">arXiv:2404.09735</a> <span> [<a href="https://arxiv.org/pdf/2404.09735">pdf</a>, <a href="https://arxiv.org/format/2404.09735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Equipping Diffusion Models with Differentiable Spatial Entropy for Low-Light Image Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenyi Lian</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenjing Lian</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Z">Ziwei Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09735v1-abstract-short" style="display: inline;"> Image restoration, which aims to recover high-quality images from their corrupted counterparts, often faces the challenge of being an ill-posed problem that allows multiple solutions for a single input. However, most deep learning based works simply employ l1 loss to train their network in a deterministic way, resulting in over-smoothed predictions with inferior perceptual quality. In this work, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09735v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09735v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09735v1-abstract-full" style="display: none;"> Image restoration, which aims to recover high-quality images from their corrupted counterparts, often faces the challenge of being an ill-posed problem that allows multiple solutions for a single input. However, most deep learning based works simply employ l1 loss to train their network in a deterministic way, resulting in over-smoothed predictions with inferior perceptual quality. In this work, we propose a novel method that shifts the focus from a deterministic pixel-by-pixel comparison to a statistical perspective, emphasizing the learning of distributions rather than individual pixel values. The core idea is to introduce spatial entropy into the loss function to measure the distribution difference between predictions and targets. To make this spatial entropy differentiable, we employ kernel density estimation (KDE) to approximate the probabilities for specific intensity values of each pixel with their neighbor areas. Specifically, we equip the entropy with diffusion models and aim for superior accuracy and enhanced perceptual quality over l1 based noise matching loss. In the experiments, we evaluate the proposed method for low light enhancement on two datasets and the NTIRE challenge 2024. All these results illustrate the effectiveness of our statistic-based entropy loss. Code is available at https://github.com/shermanlian/spatial-entropy-loss. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09735v1-abstract-full').style.display = 'none'; document.getElementById('2404.09735v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPRW 2024, best LPIPS in the NTIRE low light enhancement challenge 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.04032">arXiv:2306.04032</a> <span> [<a href="https://arxiv.org/pdf/2306.04032">pdf</a>, <a href="https://arxiv.org/format/2306.04032">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> BokehOrNot: Transforming Bokeh Effect with Image Transformer and Lens Metadata Embedding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhihao Yang</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenyi Lian</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+S">Siyuan Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.04032v1-abstract-short" style="display: inline;"> Bokeh effect is an optical phenomenon that offers a pleasant visual experience, typically generated by high-end cameras with wide aperture lenses. The task of bokeh effect transformation aims to produce a desired effect in one set of lenses and apertures based on another combination. Current models are limited in their ability to render a specific set of bokeh effects, primarily transformations fr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04032v1-abstract-full').style.display = 'inline'; document.getElementById('2306.04032v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.04032v1-abstract-full" style="display: none;"> Bokeh effect is an optical phenomenon that offers a pleasant visual experience, typically generated by high-end cameras with wide aperture lenses. The task of bokeh effect transformation aims to produce a desired effect in one set of lenses and apertures based on another combination. Current models are limited in their ability to render a specific set of bokeh effects, primarily transformations from sharp to blur. In this paper, we propose a novel universal method for embedding lens metadata into the model and introducing a loss calculation method using alpha masks from the newly released Bokeh Effect Transformation Dataset(BETD) [3]. Based on the above techniques, we propose the BokehOrNot model, which is capable of producing both blur-to-sharp and sharp-to-blur bokeh effect with various combinations of lenses and aperture sizes. Our proposed model outperforms current leading bokeh rendering and image restoration models and renders visually natural bokeh effects. Our code is available at: https://github.com/indicator0/bokehornot. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04032v1-abstract-full').style.display = 'none'; document.getElementById('2306.04032v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.11765">arXiv:2303.11765</a> <span> [<a href="https://arxiv.org/pdf/2303.11765">pdf</a>, <a href="https://arxiv.org/format/2303.11765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Cable Routing and Assembly using Tactile-driven Motion Primitives </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wilson%2C+A">Achu Wilson</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Helen Jiang</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+W">Wenzhen Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.11765v1-abstract-short" style="display: inline;"> Manipulating cables is challenging for robots because of the infinite degrees of freedom of the cables and frequent occlusion by the gripper and the environment. These challenges are further complicated by the dexterous nature of the operations required for cable routing and assembly, such as weaving and inserting, hampering common solutions with vision-only sensing. In this paper, we propose to i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.11765v1-abstract-full').style.display = 'inline'; document.getElementById('2303.11765v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.11765v1-abstract-full" style="display: none;"> Manipulating cables is challenging for robots because of the infinite degrees of freedom of the cables and frequent occlusion by the gripper and the environment. These challenges are further complicated by the dexterous nature of the operations required for cable routing and assembly, such as weaving and inserting, hampering common solutions with vision-only sensing. In this paper, we propose to integrate tactile-guided low-level motion control with high-level vision-based task parsing for a challenging task: cable routing and assembly on a reconfigurable task board. Specifically, we build a library of tactile-guided motion primitives using a fingertip GelSight sensor, where each primitive reliably accomplishes an operation such as cable following and weaving. The overall task is inferred via visual perception given a goal configuration image, and then used to generate the primitive sequence. Experiments demonstrate the effectiveness of individual tactile-guided primitives and the integrated end-to-end solution, significantly outperforming the method without tactile sensing. Our reconfigurable task setup and proposed baselines provide a benchmark for future research in cable manipulation. More details and video are presented in \url{https://helennn.github.io/cable-manip/} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.11765v1-abstract-full').style.display = 'none'; document.getElementById('2303.11765v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.00955">arXiv:2212.00955</a> <span> [<a href="https://arxiv.org/pdf/2212.00955">pdf</a>, <a href="https://arxiv.org/format/2212.00955">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Prim-LAfD: A Framework to Learn and Adapt Primitive-Based Skills from Demonstrations for Insertion Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zheng Wu</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Changhao Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mengxi Li</a>, <a href="/search/cs?searchtype=author&query=Schaal%2C+S">Stefan Schaal</a>, <a href="/search/cs?searchtype=author&query=Tomizuka%2C+M">Masayoshi Tomizuka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.00955v1-abstract-short" style="display: inline;"> Learning generalizable insertion skills in a data-efficient manner has long been a challenge in the robot learning community. While the current state-of-the-art methods with reinforcement learning (RL) show promising performance in acquiring manipulation skills, the algorithms are data-hungry and hard to generalize. To overcome the issues, in this paper we present Prim-LAfD, a simple yet effective… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.00955v1-abstract-full').style.display = 'inline'; document.getElementById('2212.00955v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.00955v1-abstract-full" style="display: none;"> Learning generalizable insertion skills in a data-efficient manner has long been a challenge in the robot learning community. While the current state-of-the-art methods with reinforcement learning (RL) show promising performance in acquiring manipulation skills, the algorithms are data-hungry and hard to generalize. To overcome the issues, in this paper we present Prim-LAfD, a simple yet effective framework to learn and adapt primitive-based insertion skills from demonstrations. Prim-LAfD utilizes black-box function optimization to learn and adapt the primitive parameters leveraging prior experiences. Human demonstrations are modeled as dense rewards guiding parameter learning. We validate the effectiveness of the proposed method on eight peg-hole and connector-socket insertion tasks. The experimental results show that our proposed framework takes less than one hour to acquire the insertion skills and as few as fifteen minutes to adapt to an unseen insertion task on a physical robot. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.00955v1-abstract-full').style.display = 'none'; document.getElementById('2212.00955v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.08199">arXiv:2211.08199</a> <span> [<a href="https://arxiv.org/pdf/2211.08199">pdf</a>, <a href="https://arxiv.org/format/2211.08199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Allowing Safe Contact in Robotic Goal-Reaching: Planning and Tracking in Operational and Null Spaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xinghao Zhu</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+B">Bodi Yuan</a>, <a href="/search/cs?searchtype=author&query=Freeman%2C+C+D">C. Daniel Freeman</a>, <a href="/search/cs?searchtype=author&query=Tomizuka%2C+M">Masayoshi Tomizuka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.08199v1-abstract-short" style="display: inline;"> In recent years, impressive results have been achieved in robotic manipulation. While many efforts focus on generating collision-free reference signals, few allow safe contact between the robot bodies and the environment. However, in human's daily manipulation, contact between arms and obstacles is prevalent and even necessary. This paper investigates the benefit of allowing safe contact during ro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.08199v1-abstract-full').style.display = 'inline'; document.getElementById('2211.08199v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.08199v1-abstract-full" style="display: none;"> In recent years, impressive results have been achieved in robotic manipulation. While many efforts focus on generating collision-free reference signals, few allow safe contact between the robot bodies and the environment. However, in human's daily manipulation, contact between arms and obstacles is prevalent and even necessary. This paper investigates the benefit of allowing safe contact during robotic manipulation and advocates generating and tracking compliance reference signals in both operational and null spaces. In addition, to optimize the collision-allowed trajectories, we present a hybrid solver that integrates sampling- and gradient-based approaches. We evaluate the proposed method on a goal-reaching task in five simulated and real-world environments with different collisional conditions. We show that allowing safe contact improves goal-reaching efficiency and provides feasible solutions in highly collisional scenarios where collision-free constraints cannot be enforced. Moreover, we demonstrate that planning in null space, in addition to operational space, improves trajectory safety. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.08199v1-abstract-full').style.display = 'none'; document.getElementById('2211.08199v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 5 figures, submitted to ICRA 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.05256">arXiv:2211.05256</a> <span> [<a href="https://arxiv.org/pdf/2211.05256">pdf</a>, <a href="https://arxiv.org/format/2211.05256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Power Efficient Video Super-Resolution on Mobile NPUs with Deep Learning, Mobile AI & AIM 2022 challenge: Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ignatov%2C+A">Andrey Ignatov</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Chiang%2C+C">Cheng-Ming Chiang</a>, <a href="/search/cs?searchtype=author&query=Kuo%2C+H">Hsien-Kai Kuo</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yu-Syuan Xu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M">Man-Yu Lee</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+A">Allen Lu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+C">Chia-Ming Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chih-Cheng Chen</a>, <a href="/search/cs?searchtype=author&query=Yong%2C+J">Jia-Ying Yong</a>, <a href="/search/cs?searchtype=author&query=Shuai%2C+H">Hong-Han Shuai</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wen-Huang Cheng</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+Z">Zhuang Jia</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+T">Tianyu Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yijian Zhang</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+L">Long Bao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Heng Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Diankai Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Si Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shaoli Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Biao Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaofeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+C">Chengjian Zheng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+K">Kaidi Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+N">Ning Wang</a> , et al. (29 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.05256v1-abstract-short" style="display: inline;"> Video super-resolution is one of the most popular tasks on mobile devices, being widely used for an automatic improvement of low-bitrate and low-resolution video streams. While numerous solutions have been proposed for this problem, they are usually quite computationally demanding, demonstrating low FPS rates and power efficiency on mobile devices. In this Mobile AI challenge, we address this prob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.05256v1-abstract-full').style.display = 'inline'; document.getElementById('2211.05256v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.05256v1-abstract-full" style="display: none;"> Video super-resolution is one of the most popular tasks on mobile devices, being widely used for an automatic improvement of low-bitrate and low-resolution video streams. While numerous solutions have been proposed for this problem, they are usually quite computationally demanding, demonstrating low FPS rates and power efficiency on mobile devices. In this Mobile AI challenge, we address this problem and propose the participants to design an end-to-end real-time video super-resolution solution for mobile NPUs optimized for low energy consumption. The participants were provided with the REDS training dataset containing video sequences for a 4X video upscaling task. The runtime and power efficiency of all models was evaluated on the powerful MediaTek Dimensity 9000 platform with a dedicated AI processing unit capable of accelerating floating-point and quantized neural networks. All proposed solutions are fully compatible with the above NPU, demonstrating an up to 500 FPS rate and 0.2 [Watt / 30 FPS] power consumption. A detailed description of all models developed in the challenge is provided in this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.05256v1-abstract-full').style.display = 'none'; document.getElementById('2211.05256v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2105.08826, arXiv:2105.07809, arXiv:2211.04470, arXiv:2211.03885</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.00350">arXiv:2210.00350</a> <span> [<a href="https://arxiv.org/pdf/2210.00350">pdf</a>, <a href="https://arxiv.org/format/2210.00350">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Zero-Shot Policy Transfer with Disentangled Task Representation of Meta-Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zheng Wu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yichen Xie</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Changhao Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yanjiang Guo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jianyu Chen</a>, <a href="/search/cs?searchtype=author&query=Schaal%2C+S">Stefan Schaal</a>, <a href="/search/cs?searchtype=author&query=Tomizuka%2C+M">Masayoshi Tomizuka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.00350v1-abstract-short" style="display: inline;"> Humans are capable of abstracting various tasks as different combinations of multiple attributes. This perspective of compositionality is vital for human rapid learning and adaption since previous experiences from related tasks can be combined to generalize across novel compositional settings. In this work, we aim to achieve zero-shot policy generalization of Reinforcement Learning (RL) agents by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.00350v1-abstract-full').style.display = 'inline'; document.getElementById('2210.00350v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.00350v1-abstract-full" style="display: none;"> Humans are capable of abstracting various tasks as different combinations of multiple attributes. This perspective of compositionality is vital for human rapid learning and adaption since previous experiences from related tasks can be combined to generalize across novel compositional settings. In this work, we aim to achieve zero-shot policy generalization of Reinforcement Learning (RL) agents by leveraging the task compositionality. Our proposed method is a meta- RL algorithm with disentangled task representation, explicitly encoding different aspects of the tasks. Policy generalization is then performed by inferring unseen compositional task representations via the obtained disentanglement without extra exploration. The evaluation is conducted on three simulated tasks and a challenging real-world robotic insertion task. Experimental results demonstrate that our proposed method achieves policy generalization to unseen compositional tasks in a zero-shot manner. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.00350v1-abstract-full').style.display = 'none'; document.getElementById('2210.00350v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.11608">arXiv:2208.11608</a> <span> [<a href="https://arxiv.org/pdf/2208.11608">pdf</a>, <a href="https://arxiv.org/format/2208.11608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Sliding Window Recurrent Network for Efficient Video Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenyi Lian</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenjing Lian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.11608v1-abstract-short" style="display: inline;"> Video super-resolution (VSR) is the task of restoring high-resolution frames from a sequence of low-resolution inputs. Different from single image super-resolution, VSR can utilize frames' temporal information to reconstruct results with more details. Recently, with the rapid development of convolution neural networks (CNN), the VSR task has drawn increasing attention and many CNN-based methods ha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.11608v1-abstract-full').style.display = 'inline'; document.getElementById('2208.11608v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.11608v1-abstract-full" style="display: none;"> Video super-resolution (VSR) is the task of restoring high-resolution frames from a sequence of low-resolution inputs. Different from single image super-resolution, VSR can utilize frames' temporal information to reconstruct results with more details. Recently, with the rapid development of convolution neural networks (CNN), the VSR task has drawn increasing attention and many CNN-based methods have achieved remarkable results. However, only a few VSR approaches can be applied to real-world mobile devices due to the computational resources and runtime limitations. In this paper, we propose a \textit{Sliding Window based Recurrent Network} (SWRN) which can be real-time inference while still achieving superior performance. Specifically, we notice that video frames should have both spatial and temporal relations that can help to recover details, and the key point is how to extract and aggregate information. Address it, we input three neighboring frames and utilize a hidden state to recurrently store and update the important temporal information. Our experiment on REDS dataset shows that the proposed method can be well adapted to mobile devices and produce visually pleasant results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.11608v1-abstract-full').style.display = 'none'; document.getElementById('2208.11608v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Participated in the AIM 2022 Real-Time Video SR Challenge</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.02468">arXiv:2203.02468</a> <span> [<a href="https://arxiv.org/pdf/2203.02468">pdf</a>, <a href="https://arxiv.org/format/2203.02468">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Symbolic State Estimation with Predicates for Contact-Rich Manipulation Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Migimatsu%2C+T">Toki Migimatsu</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Bohg%2C+J">Jeannette Bohg</a>, <a href="/search/cs?searchtype=author&query=Schaal%2C+S">Stefan Schaal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.02468v1-abstract-short" style="display: inline;"> Manipulation tasks often require a robot to adjust its sensorimotor skills based on the state it finds itself in. Taking peg-in-hole as an example: once the peg is aligned with the hole, the robot should push the peg downwards. While high level execution frameworks such as state machines and behavior trees are commonly used to formalize such decision-making problems, these frameworks require a mec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.02468v1-abstract-full').style.display = 'inline'; document.getElementById('2203.02468v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.02468v1-abstract-full" style="display: none;"> Manipulation tasks often require a robot to adjust its sensorimotor skills based on the state it finds itself in. Taking peg-in-hole as an example: once the peg is aligned with the hole, the robot should push the peg downwards. While high level execution frameworks such as state machines and behavior trees are commonly used to formalize such decision-making problems, these frameworks require a mechanism to detect the high-level symbolic state. Handcrafting heuristics to identify symbolic states can be brittle, and using data-driven methods can produce noisy predictions, particularly when working with limited datasets, as is common in real-world robotic scenarios. This paper proposes a Bayesian state estimation method to predict symbolic states with predicate classifiers. This method requires little training data and allows fusing noisy observations from multiple sensor modalities. We evaluate our framework on a set of real-world peg-in-hole and connector-socket insertion tasks, demonstrating its ability to classify symbolic states and to generalize to unseen tasks, outperforming baseline methods. We also demonstrate the ability of our method to improve the robustness of manipulation policies on a real robot. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.02468v1-abstract-full').style.display = 'none'; document.getElementById('2203.02468v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.12716">arXiv:2201.12716</a> <span> [<a href="https://arxiv.org/pdf/2201.12716">pdf</a>, <a href="https://arxiv.org/format/2201.12716">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> You Only Demonstrate Once: Category-Level Manipulation from Single Visual Demonstration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+B">Bowen Wen</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Bekris%2C+K">Kostas Bekris</a>, <a href="/search/cs?searchtype=author&query=Schaal%2C+S">Stefan Schaal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.12716v2-abstract-short" style="display: inline;"> Promising results have been achieved recently in category-level manipulation that generalizes across object instances. Nevertheless, it often requires expensive real-world data collection and manual specification of semantic keypoints for each object category and task. Additionally, coarse keypoint predictions and ignoring intermediate action sequences hinder adoption in complex manipulation tasks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.12716v2-abstract-full').style.display = 'inline'; document.getElementById('2201.12716v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.12716v2-abstract-full" style="display: none;"> Promising results have been achieved recently in category-level manipulation that generalizes across object instances. Nevertheless, it often requires expensive real-world data collection and manual specification of semantic keypoints for each object category and task. Additionally, coarse keypoint predictions and ignoring intermediate action sequences hinder adoption in complex manipulation tasks beyond pick-and-place. This work proposes a novel, category-level manipulation framework that leverages an object-centric, category-level representation and model-free 6 DoF motion tracking. The canonical object representation is learned solely in simulation and then used to parse a category-level, task trajectory from a single demonstration video. The demonstration is reprojected to a target trajectory tailored to a novel object via the canonical representation. During execution, the manipulation horizon is decomposed into longrange, collision-free motion and last-inch manipulation. For the latter part, a category-level behavior cloning (CatBC) method leverages motion tracking to perform closed-loop control. CatBC follows the target trajectory, projected from the demonstration and anchored to a dynamically selected category-level coordinate frame. The frame is automatically selected along the manipulation horizon by a local attention mechanism. This framework allows to teach different manipulation strategies by solely providing a single demonstration, without complicated manual programming. Extensive experiments demonstrate its efficacy in a range of challenging industrial tasks in highprecision assembly, which involve learning complex, long-horizon policies. The process exhibits robustness against uncertainty due to dynamics as well as generalization across object instances and scene configurations. The supplementary video is available at https://www.youtube.com/watch?v=WAr8ZY3mYyw <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.12716v2-abstract-full').style.display = 'none'; document.getElementById('2201.12716v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Robotics: Science and Systems (RSS) 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.07315">arXiv:2112.07315</a> <span> [<a href="https://arxiv.org/pdf/2112.07315">pdf</a>, <a href="https://arxiv.org/format/2112.07315">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Kernel-aware Burst Blind Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenyi Lian</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+S">Shanglian Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.07315v2-abstract-short" style="display: inline;"> Burst super-resolution (SR) technique provides a possibility of restoring rich details from low-quality images. However, since real world low-resolution (LR) images in practical applications have multiple complicated and unknown degradations, existing non-blind (e.g., bicubic) designed networks usually suffer severe performance drop in recovering high-resolution (HR) images. In this paper, we addr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.07315v2-abstract-full').style.display = 'inline'; document.getElementById('2112.07315v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.07315v2-abstract-full" style="display: none;"> Burst super-resolution (SR) technique provides a possibility of restoring rich details from low-quality images. However, since real world low-resolution (LR) images in practical applications have multiple complicated and unknown degradations, existing non-blind (e.g., bicubic) designed networks usually suffer severe performance drop in recovering high-resolution (HR) images. In this paper, we address the problem of reconstructing HR images from raw burst sequences acquired from a modern handheld device. The central idea is a kernel-guided strategy which can solve the burst SR problem with two steps: kernel estimation and HR image restoration. The former estimates burst kernels from raw inputs, while the latter predicts the super-resolved image based on the estimated kernels. Furthermore, we introduce a pyramid kernel-aware deformable alignment module which can effectively align the raw images with consideration of the blurry priors. Extensive experiments on synthetic and real-world datasets demonstrate that the proposed method can perform favorable state-of-the-art performance in the burst SR problem. Our codes are available at \url{https://github.com/shermanlian/KBNet}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.07315v2-abstract-full').style.display = 'none'; document.getElementById('2112.07315v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WACV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.09163">arXiv:2109.09163</a> <span> [<a href="https://arxiv.org/pdf/2109.09163">pdf</a>, <a href="https://arxiv.org/format/2109.09163">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> CaTGrasp: Learning Category-Level Task-Relevant Grasping in Clutter from Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+B">Bowen Wen</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Bekris%2C+K">Kostas Bekris</a>, <a href="/search/cs?searchtype=author&query=Schaal%2C+S">Stefan Schaal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.09163v2-abstract-short" style="display: inline;"> Task-relevant grasping is critical for industrial assembly, where downstream manipulation tasks constrain the set of valid grasps. Learning how to perform this task, however, is challenging, since task-relevant grasp labels are hard to define and annotate. There is also yet no consensus on proper representations for modeling or off-the-shelf tools for performing task-relevant grasps. This work pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.09163v2-abstract-full').style.display = 'inline'; document.getElementById('2109.09163v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.09163v2-abstract-full" style="display: none;"> Task-relevant grasping is critical for industrial assembly, where downstream manipulation tasks constrain the set of valid grasps. Learning how to perform this task, however, is challenging, since task-relevant grasp labels are hard to define and annotate. There is also yet no consensus on proper representations for modeling or off-the-shelf tools for performing task-relevant grasps. This work proposes a framework to learn task-relevant grasping for industrial objects without the need of time-consuming real-world data collection or manual annotation. To achieve this, the entire framework is trained solely in simulation, including supervised training with synthetic label generation and self-supervised, hand-object interaction. In the context of this framework, this paper proposes a novel, object-centric canonical representation at the category level, which allows establishing dense correspondence across object instances and transferring task-relevant grasps to novel instances. Extensive experiments on task-relevant grasping of densely-cluttered industrial objects are conducted in both simulation and real-world setups, demonstrating the effectiveness of the proposed framework. Code and data are available at https://sites.google.com/view/catgrasp. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.09163v2-abstract-full').style.display = 'none'; document.getElementById('2109.09163v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE International Conference on Robotics and Automation (ICRA) 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.11512">arXiv:2103.11512</a> <span> [<a href="https://arxiv.org/pdf/2103.11512">pdf</a>, <a href="https://arxiv.org/format/2103.11512">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Robust Multi-Modal Policies for Industrial Assembly via Reinforcement Learning and Demonstrations: A Large-Scale Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jianlan Luo</a>, <a href="/search/cs?searchtype=author&query=Sushkov%2C+O">Oleg Sushkov</a>, <a href="/search/cs?searchtype=author&query=Pevceviciute%2C+R">Rugile Pevceviciute</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Su%2C+C">Chang Su</a>, <a href="/search/cs?searchtype=author&query=Vecerik%2C+M">Mel Vecerik</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Ning Ye</a>, <a href="/search/cs?searchtype=author&query=Schaal%2C+S">Stefan Schaal</a>, <a href="/search/cs?searchtype=author&query=Scholz%2C+J">Jon Scholz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.11512v4-abstract-short" style="display: inline;"> Over the past several years there has been a considerable research investment into learning-based approaches to industrial assembly, but despite significant progress these techniques have yet to be adopted by industry. We argue that it is the prohibitively large design space for Deep Reinforcement Learning (DRL), rather than algorithmic limitations per se, that are truly responsible for this lack… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.11512v4-abstract-full').style.display = 'inline'; document.getElementById('2103.11512v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.11512v4-abstract-full" style="display: none;"> Over the past several years there has been a considerable research investment into learning-based approaches to industrial assembly, but despite significant progress these techniques have yet to be adopted by industry. We argue that it is the prohibitively large design space for Deep Reinforcement Learning (DRL), rather than algorithmic limitations per se, that are truly responsible for this lack of adoption. Pushing these techniques into the industrial mainstream requires an industry-oriented paradigm which differs significantly from the academic mindset. In this paper we define criteria for industry-oriented DRL, and perform a thorough comparison according to these criteria of one family of learning approaches, DRL from demonstration, against a professional industrial integrator on the recently established NIST assembly benchmark. We explain the design choices, representing several years of investigation, which enabled our DRL system to consistently outperform the integrator baseline in terms of both speed and reliability. Finally, we conclude with a competition between our DRL system and a human on a challenge task of insertion into a randomly moving target. This study suggests that DRL is capable of outperforming not only established engineered approaches, but the human motor system as well, and that there remains significant room for improvement. Videos can be found on our project website: https://sites.google.com/view/shield-nist. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.11512v4-abstract-full').style.display = 'none'; document.getElementById('2103.11512v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">RSS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.05140">arXiv:2103.05140</a> <span> [<a href="https://arxiv.org/pdf/2103.05140">pdf</a>, <a href="https://arxiv.org/format/2103.05140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Off-The-Shelf Solutions to Robotic Assembly Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Kelch%2C+T">Tim Kelch</a>, <a href="/search/cs?searchtype=author&query=Holz%2C+D">Dirk Holz</a>, <a href="/search/cs?searchtype=author&query=Norton%2C+A">Adam Norton</a>, <a href="/search/cs?searchtype=author&query=Schaal%2C+S">Stefan Schaal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.05140v1-abstract-short" style="display: inline;"> In recent years, many learning based approaches have been studied to realize robotic manipulation and assembly tasks, often including vision and force/tactile feedback. However, it remains frequently unclear what is the baseline state-of-the-art performance and what are the bottleneck problems. In this work, we evaluate some off-the-shelf (OTS) industrial solutions on a recently introduced benchma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.05140v1-abstract-full').style.display = 'inline'; document.getElementById('2103.05140v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.05140v1-abstract-full" style="display: none;"> In recent years, many learning based approaches have been studied to realize robotic manipulation and assembly tasks, often including vision and force/tactile feedback. However, it remains frequently unclear what is the baseline state-of-the-art performance and what are the bottleneck problems. In this work, we evaluate some off-the-shelf (OTS) industrial solutions on a recently introduced benchmark, the National Institute of Standards and Technology (NIST) Assembly Task Boards. A set of assembly tasks are introduced and baseline methods are provided to understand their intrinsic difficulty. Multiple sensor-based robotic solutions are then evaluated, including hybrid force/motion control and 2D/3D pattern matching algorithms. An end-to-end integrated solution that accomplishes the tasks is also provided. The results and findings throughout the study reveal a few noticeable factors that impede the adoptions of the OTS solutions: expertise dependent, limited applicability, lack of interoperability, no scene awareness or error recovery mechanisms, and high cost. This paper also provides a first attempt of an objective benchmark performance on the NIST Assembly Task Boards as a reference comparison for future works on this problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.05140v1-abstract-full').style.display = 'none'; document.getElementById('2103.05140v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.07458">arXiv:2101.07458</a> <span> [<a href="https://arxiv.org/pdf/2101.07458">pdf</a>, <a href="https://arxiv.org/format/2101.07458">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.neucom.2023.126482">10.1016/j.neucom.2023.126482 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Hybrid Trilinear and Bilinear Programming for Aligning Partially Overlapping Point Sets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wei Lian</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+W">Wangmeng Zuo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.07458v3-abstract-short" style="display: inline;"> In many applications, we need algorithms which can align partially overlapping point sets and are invariant to the corresponding transformations. In this work, a method possessing such properties is realized by minimizing the objective of the robust point matching (RPM) algorithm. We first show that the RPM objective is a cubic polynomial. We then utilize the convex envelopes of trilinear and bili… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.07458v3-abstract-full').style.display = 'inline'; document.getElementById('2101.07458v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.07458v3-abstract-full" style="display: none;"> In many applications, we need algorithms which can align partially overlapping point sets and are invariant to the corresponding transformations. In this work, a method possessing such properties is realized by minimizing the objective of the robust point matching (RPM) algorithm. We first show that the RPM objective is a cubic polynomial. We then utilize the convex envelopes of trilinear and bilinear monomials to derive its lower bound function. The resulting lower bound problem has the merit that it can be efficiently solved via linear assignment and low dimensional convex quadratic programming. We next develop a branch-and-bound (BnB) algorithm which only branches over the transformation variables and runs efficiently. Experimental results demonstrated better robustness of the proposed method against non-rigid deformation, positional noise and outliers in case that outliers are not mixed with inliers when compared with the state-of-the-art approaches. They also showed that it has competitive efficiency and scales well with problem size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.07458v3-abstract-full').style.display = 'none'; document.getElementById('2101.07458v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neurocomputing, July, 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.02725">arXiv:2101.02725</a> <span> [<a href="https://arxiv.org/pdf/2101.02725">pdf</a>, <a href="https://arxiv.org/format/2101.02725">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Interpreting Contact Interactions to Overcome Failure in Robot Assembly Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zachares%2C+P+A">Peter A. Zachares</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M+A">Michelle A. Lee</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Bohg%2C+J">Jeannette Bohg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.02725v2-abstract-short" style="display: inline;"> A key challenge towards the goal of multi-part assembly tasks is finding robust sensorimotor control methods in the presence of uncertainty. In contrast to previous works that rely on a priori knowledge on whether two parts match, we aim to learn this through physical interaction. We propose a hierarchical approach that enables a robot to autonomously assemble parts while being uncertain about par… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.02725v2-abstract-full').style.display = 'inline'; document.getElementById('2101.02725v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.02725v2-abstract-full" style="display: none;"> A key challenge towards the goal of multi-part assembly tasks is finding robust sensorimotor control methods in the presence of uncertainty. In contrast to previous works that rely on a priori knowledge on whether two parts match, we aim to learn this through physical interaction. We propose a hierarchical approach that enables a robot to autonomously assemble parts while being uncertain about part types and positions. In particular, our probabilistic approach learns a set of differentiable filters that leverage the tactile sensorimotor trace from failed assembly attempts to update its belief about part position and type. This enables a robot to overcome assembly failure. We demonstrate the effectiveness of our approach on a set of object fitting tasks. The experimental results indicate that our proposed approach achieves higher precision in object position and type estimation, and accomplishes object fitting tasks faster than baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.02725v2-abstract-full').style.display = 'none'; document.getElementById('2101.02725v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.08458">arXiv:2011.08458</a> <span> [<a href="https://arxiv.org/pdf/2011.08458">pdf</a>, <a href="https://arxiv.org/format/2011.08458">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Learning Dense Rewards for Contact-Rich Manipulation Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zheng Wu</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Unhelkar%2C+V">Vaibhav Unhelkar</a>, <a href="/search/cs?searchtype=author&query=Tomizuka%2C+M">Masayoshi Tomizuka</a>, <a href="/search/cs?searchtype=author&query=Schaal%2C+S">Stefan Schaal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.08458v1-abstract-short" style="display: inline;"> Rewards play a crucial role in reinforcement learning. To arrive at the desired policy, the design of a suitable reward function often requires significant domain expertise as well as trial-and-error. Here, we aim to minimize the effort involved in designing reward functions for contact-rich manipulation tasks. In particular, we provide an approach capable of extracting dense reward functions algo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.08458v1-abstract-full').style.display = 'inline'; document.getElementById('2011.08458v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.08458v1-abstract-full" style="display: none;"> Rewards play a crucial role in reinforcement learning. To arrive at the desired policy, the design of a suitable reward function often requires significant domain expertise as well as trial-and-error. Here, we aim to minimize the effort involved in designing reward functions for contact-rich manipulation tasks. In particular, we provide an approach capable of extracting dense reward functions algorithmically from robots' high-dimensional observations, such as images and tactile feedback. In contrast to state-of-the-art high-dimensional reward learning methodologies, our approach does not leverage adversarial training, and is thus less prone to the associated training instabilities. Instead, our approach learns rewards by estimating task progress in a self-supervised manner. We demonstrate the effectiveness and efficiency of our approach on two contact-rich manipulation tasks, namely, peg-in-hole and USB insertion. The experimental results indicate that the policies trained with the learned reward function achieves better performance and faster convergence compared to the baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.08458v1-abstract-full').style.display = 'none'; document.getElementById('2011.08458v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.02363">arXiv:2007.02363</a> <span> [<a href="https://arxiv.org/pdf/2007.02363">pdf</a>, <a href="https://arxiv.org/format/2007.02363">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Aligning Partially Overlapping Point Sets: an Inner Approximation Algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wei Lian</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+W">WangMeng Zuo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.02363v1-abstract-short" style="display: inline;"> Aligning partially overlapping point sets where there is no prior information about the value of the transformation is a challenging problem in computer vision. To achieve this goal, we first reduce the objective of the robust point matching algorithm to a function of a low dimensional variable. The resulting function, however, is only concave over a finite region including the feasible region. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.02363v1-abstract-full').style.display = 'inline'; document.getElementById('2007.02363v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.02363v1-abstract-full" style="display: none;"> Aligning partially overlapping point sets where there is no prior information about the value of the transformation is a challenging problem in computer vision. To achieve this goal, we first reduce the objective of the robust point matching algorithm to a function of a low dimensional variable. The resulting function, however, is only concave over a finite region including the feasible region. To cope with this issue, we employ the inner approximation optimization algorithm which only operates within the region where the objective function is concave. Our algorithm does not need regularization on transformation, and thus can handle the situation where there is no prior information about the values of the transformations. Our method is also $蔚-$globally optimal and thus is guaranteed to be robust. Moreover, its most computationally expensive subroutine is a linear assignment problem which can be efficiently solved. Experimental results demonstrate the better robustness of the proposed method over state-of-the-art algorithms. Our method is also efficient when the number of transformation parameters is small. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.02363v1-abstract-full').style.display = 'none'; document.getElementById('2007.02363v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1907.03128">arXiv:1907.03128</a> <span> [<a href="https://arxiv.org/pdf/1907.03128">pdf</a>, <a href="https://arxiv.org/format/1907.03128">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Multi-level Wavelet Convolutional Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+P">Pengju Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongzhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wei Lian</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+W">Wangmeng Zuo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1907.03128v1-abstract-short" style="display: inline;"> In computer vision, convolutional networks (CNNs) often adopts pooling to enlarge receptive field which has the advantage of low computational complexity. However, pooling can cause information loss and thus is detrimental to further operations such as features extraction and analysis. Recently, dilated filter has been proposed to trade off between receptive field size and efficiency. But the acco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.03128v1-abstract-full').style.display = 'inline'; document.getElementById('1907.03128v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1907.03128v1-abstract-full" style="display: none;"> In computer vision, convolutional networks (CNNs) often adopts pooling to enlarge receptive field which has the advantage of low computational complexity. However, pooling can cause information loss and thus is detrimental to further operations such as features extraction and analysis. Recently, dilated filter has been proposed to trade off between receptive field size and efficiency. But the accompanying gridding effect can cause a sparse sampling of input images with checkerboard patterns. To address this problem, in this paper, we propose a novel multi-level wavelet CNN (MWCNN) model to achieve better trade-off between receptive field size and computational efficiency. The core idea is to embed wavelet transform into CNN architecture to reduce the resolution of feature maps while at the same time, increasing receptive field. Specifically, MWCNN for image restoration is based on U-Net architecture, and inverse wavelet transform (IWT) is deployed to reconstruct the high resolution (HR) feature maps. The proposed MWCNN can also be viewed as an improvement of dilated filter and a generalization of average pooling, and can be applied to not only image restoration tasks, but also any CNNs requiring a pooling operation. The experimental results demonstrate effectiveness of the proposed MWCNN for tasks such as image denoising, single image super-resolution, JPEG image artifacts removal and object classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.03128v1-abstract-full').style.display = 'none'; document.getElementById('1907.03128v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1705.06840">arXiv:1705.06840</a> <span> [<a href="https://arxiv.org/pdf/1705.06840">pdf</a>, <a href="https://arxiv.org/format/1705.06840">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> The Conference Paper Assignment Problem: Using Order Weighted Averages to Assign Indivisible Goods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+J+W">Jing Wu Lian</a>, <a href="/search/cs?searchtype=author&query=Mattei%2C+N">Nicholas Mattei</a>, <a href="/search/cs?searchtype=author&query=Noble%2C+R">Renee Noble</a>, <a href="/search/cs?searchtype=author&query=Walsh%2C+T">Toby Walsh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1705.06840v1-abstract-short" style="display: inline;"> Motivated by the common academic problem of allocating papers to referees for conference reviewing we propose a novel mechanism for solving the assignment problem when we have a two sided matching problem with preferences from one side (the agents/reviewers) over the other side (the objects/papers) and both sides have capacity constraints. The assignment problem is a fundamental problem in both co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.06840v1-abstract-full').style.display = 'inline'; document.getElementById('1705.06840v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1705.06840v1-abstract-full" style="display: none;"> Motivated by the common academic problem of allocating papers to referees for conference reviewing we propose a novel mechanism for solving the assignment problem when we have a two sided matching problem with preferences from one side (the agents/reviewers) over the other side (the objects/papers) and both sides have capacity constraints. The assignment problem is a fundamental problem in both computer science and economics with application in many areas including task and resource allocation. We draw inspiration from multi-criteria decision making and voting and use order weighted averages (OWAs) to propose a novel and flexible class of algorithms for the assignment problem. We show an algorithm for finding a $危$-OWA assignment in polynomial time, in contrast to the NP-hardness of finding an egalitarian assignment. Inspired by this setting we observe an interesting connection between our model and the classic proportional multi-winner election problem in social choice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.06840v1-abstract-full').style.display = 'none'; document.getElementById('1705.06840v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3 Figure</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 91A80; 91B74 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> J.4; I.2; G.1.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1701.01035">arXiv:1701.01035</a> <span> [<a href="https://arxiv.org/pdf/1701.01035">pdf</a>, <a href="https://arxiv.org/format/1701.01035">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Path-following based Point Matching using Similarity Transformation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wei Lian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1701.01035v1-abstract-short" style="display: inline;"> To address the problem of 3D point matching where the poses of two point sets are unknown, we adapt a recently proposed path following based method to use similarity transformation instead of the original affine transformation. The reduced number of transformation parameters leads to more constrained and desirable matching results. Experimental results demonstrate better robustness of the proposed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1701.01035v1-abstract-full').style.display = 'inline'; document.getElementById('1701.01035v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1701.01035v1-abstract-full" style="display: none;"> To address the problem of 3D point matching where the poses of two point sets are unknown, we adapt a recently proposed path following based method to use similarity transformation instead of the original affine transformation. The reduced number of transformation parameters leads to more constrained and desirable matching results. Experimental results demonstrate better robustness of the proposed method over state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1701.01035v1-abstract-full').style.display = 'none'; document.getElementById('1701.01035v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1701.00951">arXiv:1701.00951</a> <span> [<a href="https://arxiv.org/pdf/1701.00951">pdf</a>, <a href="https://arxiv.org/format/1701.00951">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Concave Optimization Algorithm for Matching Partially Overlapping Point Sets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wei Lian</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1701.00951v1-abstract-short" style="display: inline;"> Point matching refers to the process of finding spatial transformation and correspondences between two sets of points. In this paper, we focus on the case that there is only partial overlap between two point sets. Following the approach of the robust point matching method, we model point matching as a mixed linear assignment-least square problem and show that after eliminating the transformation v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1701.00951v1-abstract-full').style.display = 'inline'; document.getElementById('1701.00951v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1701.00951v1-abstract-full" style="display: none;"> Point matching refers to the process of finding spatial transformation and correspondences between two sets of points. In this paper, we focus on the case that there is only partial overlap between two point sets. Following the approach of the robust point matching method, we model point matching as a mixed linear assignment-least square problem and show that after eliminating the transformation variable, the resulting problem of minimization with respect to point correspondence is a concave optimization problem. Furthermore, this problem has the property that the objective function can be converted into a form with few nonlinear terms via a linear transformation. Based on these properties, we employ the branch-and-bound (BnB) algorithm to optimize the resulting problem where the dimension of the search space is small. To further improve efficiency of the BnB algorithm where computation of the lower bound is the bottleneck, we propose a new lower bounding scheme which has a k-cardinality linear assignment formulation and can be efficiently solved. Experimental results show that the proposed algorithm outperforms state-of-the-art methods in terms of robustness to disturbances and point matching accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1701.00951v1-abstract-full').style.display = 'none'; document.getElementById('1701.00951v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1507.01073">arXiv:1507.01073</a> <span> [<a href="https://arxiv.org/pdf/1507.01073">pdf</a>, <a href="https://arxiv.org/ps/1507.01073">ps</a>, <a href="https://arxiv.org/format/1507.01073">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Convex Factorization Machine for Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yamada%2C+M">Makoto Yamada</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+W">Wenzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Goyal%2C+A">Amit Goyal</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jianhui Chen</a>, <a href="/search/cs?searchtype=author&query=Wimalawarne%2C+K">Kishan Wimalawarne</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+S+A">Suleiman A Khan</a>, <a href="/search/cs?searchtype=author&query=Kaski%2C+S">Samuel Kaski</a>, <a href="/search/cs?searchtype=author&query=Mamitsuka%2C+H">Hiroshi Mamitsuka</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+Y">Yi Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1507.01073v5-abstract-short" style="display: inline;"> We propose the convex factorization machine (CFM), which is a convex variant of the widely used Factorization Machines (FMs). Specifically, we employ a linear+quadratic model and regularize the linear term with the $\ell_2$-regularizer and the quadratic term with the trace norm regularizer. Then, we formulate the CFM optimization as a semidefinite programming problem and propose an efficient optim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1507.01073v5-abstract-full').style.display = 'inline'; document.getElementById('1507.01073v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1507.01073v5-abstract-full" style="display: none;"> We propose the convex factorization machine (CFM), which is a convex variant of the widely used Factorization Machines (FMs). Specifically, we employ a linear+quadratic model and regularize the linear term with the $\ell_2$-regularizer and the quadratic term with the trace norm regularizer. Then, we formulate the CFM optimization as a semidefinite programming problem and propose an efficient optimization procedure with Hazan's algorithm. A key advantage of CFM over existing FMs is that it can find a globally optimal solution, while FMs may get a poor locally optimal solution since the objective function of FMs is non-convex. In addition, the proposed algorithm is simple yet effective and can be implemented easily. Finally, CFM is a general factorization method and can also be used for other factorization problems including including multi-view matrix factorization and tensor completion problems. Through synthetic and movielens datasets, we first show that the proposed CFM achieves results competitive to FMs. Furthermore, in a toxicogenomics prediction task, we show that CFM outperforms a state-of-the-art tensor factorization method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1507.01073v5-abstract-full').style.display = 'none'; document.getElementById('1507.01073v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2015. </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository