Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–27 of 27 results for author: <span class="mathjax">Rosin, P L</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Rosin%2C+P+L">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Rosin, P L"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Rosin%2C+P+L&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Rosin, P L"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19227">arXiv:2501.19227</a> <span> [<a href="https://arxiv.org/pdf/2501.19227">pdf</a>, <a href="https://arxiv.org/format/2501.19227">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Integrating Semi-Supervised and Active Learning for Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wanli Ma</a>, <a href="/search/cs?searchtype=author&query=Karakus%2C+O">Oktay Karakus</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19227v1-abstract-short" style="display: inline;"> In this paper, we propose a novel active learning approach integrated with an improved semi-supervised learning framework to reduce the cost of manual annotation and enhance model performance. Our proposed approach effectively leverages both the labelled data selected through active learning and the unlabelled data excluded from the selection process. The proposed active learning approach pinpoint… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19227v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19227v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19227v1-abstract-full" style="display: none;"> In this paper, we propose a novel active learning approach integrated with an improved semi-supervised learning framework to reduce the cost of manual annotation and enhance model performance. Our proposed approach effectively leverages both the labelled data selected through active learning and the unlabelled data excluded from the selection process. The proposed active learning approach pinpoints areas where the pseudo-labels are likely to be inaccurate. Then, an automatic and efficient pseudo-label auto-refinement (PLAR) module is proposed to correct pixels with potentially erroneous pseudo-labels by comparing their feature representations with those of labelled regions. This approach operates without increasing the labelling budget and is based on the cluster assumption, which states that pixels belonging to the same class should exhibit similar representations in feature space. Furthermore, manual labelling is only applied to the most difficult and uncertain areas in unlabelled data, where insufficient information prevents the PLAR module from making a decision. We evaluated the proposed hybrid semi-supervised active learning framework on two benchmark datasets, one from natural and the other from remote sensing imagery domains. In both cases, it outperformed state-of-the-art methods in the semantic segmentation task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19227v1-abstract-full').style.display = 'none'; document.getElementById('2501.19227v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05265">arXiv:2501.05265</a> <span> [<a href="https://arxiv.org/pdf/2501.05265">pdf</a>, <a href="https://arxiv.org/format/2501.05265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Patch-GAN Transfer Learning with Reconstructive Models for Cloud Removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wanli Ma</a>, <a href="/search/cs?searchtype=author&query=Karakus%2C+O">Oktay Karakus</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05265v1-abstract-short" style="display: inline;"> Cloud removal plays a crucial role in enhancing remote sensing image analysis, yet accurately reconstructing cloud-obscured regions remains a significant challenge. Recent advancements in generative models have made the generation of realistic images increasingly accessible, offering new opportunities for this task. Given the conceptual alignment between image generation and cloud removal tasks, g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05265v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05265v1-abstract-full" style="display: none;"> Cloud removal plays a crucial role in enhancing remote sensing image analysis, yet accurately reconstructing cloud-obscured regions remains a significant challenge. Recent advancements in generative models have made the generation of realistic images increasingly accessible, offering new opportunities for this task. Given the conceptual alignment between image generation and cloud removal tasks, generative models present a promising approach for addressing cloud removal in remote sensing. In this work, we propose a deep transfer learning approach built on a generative adversarial network (GAN) framework to explore the potential of the novel masked autoencoder (MAE) image reconstruction model in cloud removal. Due to the complexity of remote sensing imagery, we further propose using a patch-wise discriminator to determine whether each patch of the image is real or not. The proposed reconstructive transfer learning approach demonstrates significant improvements in cloud removal performance compared to other GAN-based methods. Additionally, whilst direct comparisons with some of the state-of-the-art cloud removal techniques are limited due to unclear details regarding their train/test data splits, the proposed model achieves competitive results based on available benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05265v1-abstract-full').style.display = 'none'; document.getElementById('2501.05265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18933">arXiv:2412.18933</a> <span> [<a href="https://arxiv.org/pdf/2412.18933">pdf</a>, <a href="https://arxiv.org/format/2412.18933">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> TINQ: Temporal Inconsistency Guided Blind Video Quality Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yixiao Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaoyuan Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Weide Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xin Jin</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+X">Xu Jia</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yukun Lai</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haotao Liu</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L Rosin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wei Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18933v1-abstract-short" style="display: inline;"> Blind video quality assessment (BVQA) has been actively researched for user-generated content (UGC) videos. Recently, super-resolution (SR) techniques have been widely applied in UGC. Therefore, an effective BVQA method for both UGC and SR scenarios is essential. Temporal inconsistency, referring to irregularities between consecutive frames, is relevant to video quality. Current BVQA approaches ty… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18933v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18933v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18933v1-abstract-full" style="display: none;"> Blind video quality assessment (BVQA) has been actively researched for user-generated content (UGC) videos. Recently, super-resolution (SR) techniques have been widely applied in UGC. Therefore, an effective BVQA method for both UGC and SR scenarios is essential. Temporal inconsistency, referring to irregularities between consecutive frames, is relevant to video quality. Current BVQA approaches typically model temporal relationships in UGC videos using statistics of motion information, but inconsistencies remain unexplored. Additionally, different from temporal inconsistency in UGC videos, such inconsistency in SR videos is amplified due to upscaling algorithms. In this paper, we introduce the Temporal Inconsistency Guided Blind Video Quality Assessment (TINQ) metric, demonstrating that exploring temporal inconsistency is crucial for effective BVQA. Since temporal inconsistencies vary between UGC and SR videos, they are calculated in different ways. Based on this, a spatial module highlights inconsistent areas across consecutive frames at coarse and fine granularities. In addition, a temporal module aggregates features over time in two stages. The first stage employs a visual memory capacity block to adaptively segment the time dimension based on estimated complexity, while the second stage focuses on selecting key features. The stages work together through Consistency-aware Fusion Units to regress cross-time-scale video quality. Extensive experiments on UGC and SR video quality datasets show that our method outperforms existing state-of-the-art BVQA methods. Code is available at https://github.com/Lighting-YXLI/TINQ. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18933v1-abstract-full').style.display = 'none'; document.getElementById('2412.18933v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16418">arXiv:2410.16418</a> <span> [<a href="https://arxiv.org/pdf/2410.16418">pdf</a>, <a href="https://arxiv.org/format/2410.16418">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AttentionPainter: An Efficient and Adaptive Stroke Predictor for Scene Painting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yizhe Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+T">Teng Hu</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ran Yi</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xin Tan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lizhuang Ma</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16418v2-abstract-short" style="display: inline;"> Stroke-based Rendering (SBR) aims to decompose an input image into a sequence of parameterized strokes, which can be rendered into a painting that resembles the input image. Recently, Neural Painting methods that utilize deep learning and reinforcement learning models to predict the stroke sequences have been developed, but suffer from longer inference time or unstable training. To address these i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16418v2-abstract-full').style.display = 'inline'; document.getElementById('2410.16418v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16418v2-abstract-full" style="display: none;"> Stroke-based Rendering (SBR) aims to decompose an input image into a sequence of parameterized strokes, which can be rendered into a painting that resembles the input image. Recently, Neural Painting methods that utilize deep learning and reinforcement learning models to predict the stroke sequences have been developed, but suffer from longer inference time or unstable training. To address these issues, we propose AttentionPainter, an efficient and adaptive model for single-step neural painting. First, we propose a novel scalable stroke predictor, which predicts a large number of stroke parameters within a single forward process, instead of the iterative prediction of previous Reinforcement Learning or auto-regressive methods, which makes AttentionPainter faster than previous neural painting methods. To further increase the training efficiency, we propose a Fast Stroke Stacking algorithm, which brings 13 times acceleration for training. Moreover, we propose Stroke-density Loss, which encourages the model to use small strokes for detailed information, to help improve the reconstruction quality. Finally, we propose a new stroke diffusion model for both conditional and unconditional stroke-based generation, which denoises in the stroke parameter space and facilitates stroke-based inpainting and editing applications helpful for human artists design. Extensive experiments show that AttentionPainter outperforms the state-of-the-art neural painting methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16418v2-abstract-full').style.display = 'none'; document.getElementById('2410.16418v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09794">arXiv:2406.09794</a> <span> [<a href="https://arxiv.org/pdf/2406.09794">pdf</a>, <a href="https://arxiv.org/format/2406.09794">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SuperSVG: Superpixel-based Scalable Vector Graphics Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+T">Teng Hu</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ran Yi</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+B">Baihong Qian</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiangning Zhang</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09794v1-abstract-short" style="display: inline;"> SVG (Scalable Vector Graphics) is a widely used graphics format that possesses excellent scalability and editability. Image vectorization, which aims to convert raster images to SVGs, is an important yet challenging problem in computer vision and graphics. Existing image vectorization methods either suffer from low reconstruction accuracy for complex images or require long computation time. To add… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09794v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09794v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09794v1-abstract-full" style="display: none;"> SVG (Scalable Vector Graphics) is a widely used graphics format that possesses excellent scalability and editability. Image vectorization, which aims to convert raster images to SVGs, is an important yet challenging problem in computer vision and graphics. Existing image vectorization methods either suffer from low reconstruction accuracy for complex images or require long computation time. To address this issue, we propose SuperSVG, a superpixel-based vectorization model that achieves fast and high-precision image vectorization. Specifically, we decompose the input image into superpixels to help the model focus on areas with similar colors and textures. Then, we propose a two-stage self-training framework, where a coarse-stage model is employed to reconstruct the main structure and a refinement-stage model is used for enriching the details. Moreover, we propose a novel dynamic path warping loss to help the refinement-stage model to inherit knowledge from the coarse-stage model. Extensive qualitative and quantitative experiments demonstrate the superior performance of our method in terms of reconstruction accuracy and inference time compared to state-of-the-art approaches. The code is available in \url{https://github.com/sjtuplayer/SuperSVG}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09794v1-abstract-full').style.display = 'none'; document.getElementById('2406.09794v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.18089">arXiv:2404.18089</a> <span> [<a href="https://arxiv.org/pdf/2404.18089">pdf</a>, <a href="https://arxiv.org/format/2404.18089">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Asymmetric Information Enhanced Mapping Framework for Multirobot Exploration based on Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+J">Jiyu Cheng</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Junhui Fan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaolei Li</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yibin Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.18089v2-abstract-short" style="display: inline;"> Despite the great development of multirobot technologies, efficiently and collaboratively exploring an unknown environment is still a big challenge. In this paper, we propose AIM-Mapping, a Asymmetric InforMation Enhanced Mapping framework. The framework fully utilizes the privilege information in the training process to help construct the environment representation as well as the supervised signa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18089v2-abstract-full').style.display = 'inline'; document.getElementById('2404.18089v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.18089v2-abstract-full" style="display: none;"> Despite the great development of multirobot technologies, efficiently and collaboratively exploring an unknown environment is still a big challenge. In this paper, we propose AIM-Mapping, a Asymmetric InforMation Enhanced Mapping framework. The framework fully utilizes the privilege information in the training process to help construct the environment representation as well as the supervised signal in an asymmetric actor-critic training framework. Specifically, privilege information is used to evaluate the exploration performance through an asymmetric feature representation module and a mutual information evaluation module. The decision-making network uses the trained feature encoder to extract structure information from the environment and combines it with a topological map constructed based on geometric distance. Utilizing this kind of topological map representation, we employ topological graph matching to assign corresponding boundary points to each robot as long-term goal points. We conduct experiments in real-world-like scenarios using the Gibson simulation environments. It validates that the proposed method, when compared to existing methods, achieves great performance improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18089v2-abstract-full').style.display = 'none'; document.getElementById('2404.18089v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15139">arXiv:2403.15139</a> <span> [<a href="https://arxiv.org/pdf/2403.15139">pdf</a>, <a href="https://arxiv.org/format/2403.15139">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Deep Generative Model based Rate-Distortion for Image Downscaling Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuanbang Liang</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+B">Bhavesh Garg</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L Rosin</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yipeng Qin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15139v1-abstract-short" style="display: inline;"> In this paper, we propose Image Downscaling Assessment by Rate-Distortion (IDA-RD), a novel measure to quantitatively evaluate image downscaling algorithms. In contrast to image-based methods that measure the quality of downscaled images, ours is process-based that draws ideas from rate-distortion theory to measure the distortion incurred during downscaling. Our main idea is that downscaling and s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15139v1-abstract-full').style.display = 'inline'; document.getElementById('2403.15139v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15139v1-abstract-full" style="display: none;"> In this paper, we propose Image Downscaling Assessment by Rate-Distortion (IDA-RD), a novel measure to quantitatively evaluate image downscaling algorithms. In contrast to image-based methods that measure the quality of downscaled images, ours is process-based that draws ideas from rate-distortion theory to measure the distortion incurred during downscaling. Our main idea is that downscaling and super-resolution (SR) can be viewed as the encoding and decoding processes in the rate-distortion model, respectively, and that a downscaling algorithm that preserves more details in the resulting low-resolution (LR) images should lead to less distorted high-resolution (HR) images in SR. In other words, the distortion should increase as the downscaling algorithm deteriorates. However, it is non-trivial to measure this distortion as it requires the SR algorithm to be blind and stochastic. Our key insight is that such requirements can be met by recent SR algorithms based on deep generative models that can find all matching HR images for a given LR image on their learned image manifolds. Extensive experimental results show the effectiveness of our IDA-RD measure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15139v1-abstract-full').style.display = 'none'; document.getElementById('2403.15139v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05305">arXiv:2402.05305</a> <span> [<a href="https://arxiv.org/pdf/2402.05305">pdf</a>, <a href="https://arxiv.org/format/2402.05305">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Knowledge Distillation for Road Detection based on cross-model Semi-Supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wanli Ma</a>, <a href="/search/cs?searchtype=author&query=Karakus%2C+O">Oktay Karakus</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05305v2-abstract-short" style="display: inline;"> The advancement of knowledge distillation has played a crucial role in enabling the transfer of knowledge from larger teacher models to smaller and more efficient student models, and is particularly beneficial for online and resource-constrained applications. The effectiveness of the student model heavily relies on the quality of the distilled knowledge received from the teacher. Given the accessi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05305v2-abstract-full').style.display = 'inline'; document.getElementById('2402.05305v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05305v2-abstract-full" style="display: none;"> The advancement of knowledge distillation has played a crucial role in enabling the transfer of knowledge from larger teacher models to smaller and more efficient student models, and is particularly beneficial for online and resource-constrained applications. The effectiveness of the student model heavily relies on the quality of the distilled knowledge received from the teacher. Given the accessibility of unlabelled remote sensing data, semi-supervised learning has become a prevalent strategy for enhancing model performance. However, relying solely on semi-supervised learning with smaller models may be insufficient due to their limited capacity for feature extraction. This limitation restricts their ability to exploit training data. To address this issue, we propose an integrated approach that combines knowledge distillation and semi-supervised learning methods. This hybrid approach leverages the robust capabilities of large models to effectively utilise large unlabelled data whilst subsequently providing the small student model with rich and informative features for enhancement. The proposed semi-supervised learning-based knowledge distillation (SSLKD) approach demonstrates a notable improvement in the performance of the student model, in the application of road segmentation, surpassing the effectiveness of traditional semi-supervised learning methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05305v2-abstract-full').style.display = 'none'; document.getElementById('2402.05305v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.13716">arXiv:2311.13716</a> <span> [<a href="https://arxiv.org/pdf/2311.13716">pdf</a>, <a href="https://arxiv.org/format/2311.13716">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DiverseNet: Decision Diversified Semi-supervised Semantic Segmentation Networks for Remote Sensing Imagery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wanli Ma</a>, <a href="/search/cs?searchtype=author&query=Karakus%2C+O">Oktay Karakus</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.13716v2-abstract-short" style="display: inline;"> Semi-supervised learning aims to help reduce the cost of the manual labelling process by leveraging valuable features extracted from a substantial pool of unlabeled data alongside a limited set of labelled data during the training phase. Since pixel-level manual labelling in large-scale remote sensing imagery is expensive, semi-supervised learning becomes an appropriate solution to this. However,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.13716v2-abstract-full').style.display = 'inline'; document.getElementById('2311.13716v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.13716v2-abstract-full" style="display: none;"> Semi-supervised learning aims to help reduce the cost of the manual labelling process by leveraging valuable features extracted from a substantial pool of unlabeled data alongside a limited set of labelled data during the training phase. Since pixel-level manual labelling in large-scale remote sensing imagery is expensive, semi-supervised learning becomes an appropriate solution to this. However, most of the existing consistency learning frameworks based on network perturbation are very bulky. There is still a lack of lightweight and efficient perturbation methods to promote the diversity of features and the precision of pseudo labels during training. In order to fill this gap, we propose DiverseNet which explores multi-head and multi-model semi-supervised learning algorithms by simultaneously enhancing precision and diversity during training. The two proposed methods in the DiverseNet family, namely DiverseHead and DiverseModel, both achieve the better semantic segmentation performance in four widely utilised remote sensing imagery data sets compared to state-of-the-art semi-supervised learning methods. Meanwhile, the proposed DiverseHead architecture is simple and relatively lightweight in terms of parameter space compared to the state-of-the-art methods whilst reaching high-performance results for all the tested data sets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.13716v2-abstract-full').style.display = 'none'; document.getElementById('2311.13716v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.05276">arXiv:2311.05276</a> <span> [<a href="https://arxiv.org/pdf/2311.05276">pdf</a>, <a href="https://arxiv.org/format/2311.05276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SAMVG: A Multi-stage Image Vectorization Model with the Segment-Anything Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Haokun Zhu</a>, <a href="/search/cs?searchtype=author&query=Chong%2C+J+I">Juang Ian Chong</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+T">Teng Hu</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ran Yi</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.05276v2-abstract-short" style="display: inline;"> Vector graphics are widely used in graphical designs and have received more and more attention. However, unlike raster images which can be easily obtained, acquiring high-quality vector graphics, typically through automatically converting from raster images remains a significant challenge, especially for more complex images such as photos or artworks. In this paper, we propose SAMVG, a multi-stage… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05276v2-abstract-full').style.display = 'inline'; document.getElementById('2311.05276v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.05276v2-abstract-full" style="display: none;"> Vector graphics are widely used in graphical designs and have received more and more attention. However, unlike raster images which can be easily obtained, acquiring high-quality vector graphics, typically through automatically converting from raster images remains a significant challenge, especially for more complex images such as photos or artworks. In this paper, we propose SAMVG, a multi-stage model to vectorize raster images into SVG (Scalable Vector Graphics). Firstly, SAMVG uses general image segmentation provided by the Segment-Anything Model and uses a novel filtering method to identify the best dense segmentation map for the entire image. Secondly, SAMVG then identifies missing components and adds more detailed components to the SVG. Through a series of extensive experiments, we demonstrate that SAMVG can produce high quality SVGs in any domain while requiring less computation time and complexity compared to previous state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05276v2-abstract-full').style.display = 'none'; document.getElementById('2311.05276v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10344">arXiv:2305.10344</a> <span> [<a href="https://arxiv.org/pdf/2305.10344">pdf</a>, <a href="https://arxiv.org/format/2305.10344">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Confidence-Guided Semi-supervised Learning in Land Cover Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wanli Ma</a>, <a href="/search/cs?searchtype=author&query=Karakus%2C+O">Oktay Karakus</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10344v2-abstract-short" style="display: inline;"> Semi-supervised learning has been well developed to help reduce the cost of manual labelling by exploiting a large quantity of unlabelled data. Especially in the application of land cover classification, pixel-level manual labelling in large-scale imagery is labour-intensive, time-consuming and expensive. However, existing semi-supervised learning methods pay limited attention to the quality of ps… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10344v2-abstract-full').style.display = 'inline'; document.getElementById('2305.10344v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10344v2-abstract-full" style="display: none;"> Semi-supervised learning has been well developed to help reduce the cost of manual labelling by exploiting a large quantity of unlabelled data. Especially in the application of land cover classification, pixel-level manual labelling in large-scale imagery is labour-intensive, time-consuming and expensive. However, existing semi-supervised learning methods pay limited attention to the quality of pseudo-labels during training even though the quality of training data is one of the critical factors determining network performance. In order to fill this gap, we develop a confidence-guided semi-supervised learning (CGSSL) approach to make use of high-confidence pseudo labels and reduce the negative effect of low-confidence ones for land cover classification. Meanwhile, the proposed semi-supervised learning approach uses multiple network architectures to increase the diversity of pseudo labels. The proposed semi-supervised learning approach significantly improves the performance of land cover classification compared to the classic semi-supervised learning methods and even outperforms fully supervised learning with a complete set of labelled imagery of the benchmark Potsdam land cover dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10344v2-abstract-full').style.display = 'none'; document.getElementById('2305.10344v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.15166">arXiv:2303.15166</a> <span> [<a href="https://arxiv.org/pdf/2303.15166">pdf</a>, <a href="https://arxiv.org/format/2303.15166">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Artistic Image Aesthetics Assessment: a Large-scale Dataset and a New Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ran Yi</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+H">Haoyuan Tian</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Z">Zhihao Gu</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.15166v1-abstract-short" style="display: inline;"> Image aesthetics assessment (IAA) is a challenging task due to its highly subjective nature. Most of the current studies rely on large-scale datasets (e.g., AVA and AADB) to learn a general model for all kinds of photography images. However, little light has been shed on measuring the aesthetic quality of artistic images, and the existing datasets only contain relatively few artworks. Such a defec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.15166v1-abstract-full').style.display = 'inline'; document.getElementById('2303.15166v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.15166v1-abstract-full" style="display: none;"> Image aesthetics assessment (IAA) is a challenging task due to its highly subjective nature. Most of the current studies rely on large-scale datasets (e.g., AVA and AADB) to learn a general model for all kinds of photography images. However, little light has been shed on measuring the aesthetic quality of artistic images, and the existing datasets only contain relatively few artworks. Such a defect is a great obstacle to the aesthetic assessment of artistic images. To fill the gap in the field of artistic image aesthetics assessment (AIAA), we first introduce a large-scale AIAA dataset: Boldbrush Artistic Image Dataset (BAID), which consists of 60,337 artistic images covering various art forms, with more than 360,000 votes from online users. We then propose a new method, SAAN (Style-specific Art Assessment Network), which can effectively extract and utilize style-specific and generic aesthetic information to evaluate artistic images. Experiments demonstrate that our proposed approach outperforms existing IAA methods on the proposed BAID dataset according to quantitative comparisons. We believe the proposed dataset and method can serve as a foundation for future AIAA works and inspire more research in this field. Dataset and code are available at: https://github.com/Dreemurr-T/BAID.git <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.15166v1-abstract-full').style.display = 'none'; document.getElementById('2303.15166v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.09616">arXiv:2209.09616</a> <span> [<a href="https://arxiv.org/pdf/2209.09616">pdf</a>, <a href="https://arxiv.org/format/2209.09616">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Provably Uncertainty-Guided Universal Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yifan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lin Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+R">Ran Song</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yibin Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.09616v9-abstract-short" style="display: inline;"> Universal domain adaptation (UniDA) aims to transfer the knowledge from a labeled source domain to an unlabeled target domain without any assumptions of the label sets, which requires distinguishing the unknown samples from the known ones in the target domain. A main challenge of UniDA is that the nonidentical label sets cause the misalignment between the two domains. Moreover, the domain discrepa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.09616v9-abstract-full').style.display = 'inline'; document.getElementById('2209.09616v9-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.09616v9-abstract-full" style="display: none;"> Universal domain adaptation (UniDA) aims to transfer the knowledge from a labeled source domain to an unlabeled target domain without any assumptions of the label sets, which requires distinguishing the unknown samples from the known ones in the target domain. A main challenge of UniDA is that the nonidentical label sets cause the misalignment between the two domains. Moreover, the domain discrepancy and the supervised objectives in the source domain easily lead the whole model to be biased towards the common classes and produce overconfident predictions for unknown samples. To address the above challenging problems, we propose a new uncertainty-guided UniDA framework. Firstly, we introduce an empirical estimation of the probability of a target sample belonging to the unknown class which fully exploits the distribution of the target samples in the latent space. Then, based on the estimation, we propose a novel neighbors searching scheme in a linear subspace with a $未$-filter to estimate the uncertainty score of a target sample and discover unknown samples. It fully utilizes the relationship between a target sample and its neighbors in the source domain to avoid the influence of domain misalignment. Secondly, this paper well balances the confidences of predictions for both known and unknown samples through an uncertainty-guided margin loss based on the confidences of discovered unknown samples, which can reduce the gap between the intra-class variances of known classes with respect to the unknown class. Finally, experiments on three public datasets demonstrate that our method significantly outperforms existing state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.09616v9-abstract-full').style.display = 'none'; document.getElementById('2209.09616v9-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages. arXiv admin note: text overlap with arXiv:2207.09280</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.09280">arXiv:2207.09280</a> <span> [<a href="https://arxiv.org/pdf/2207.09280">pdf</a>, <a href="https://arxiv.org/format/2207.09280">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Inter-Sample Affinity for Knowability-Aware Universal Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yifan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lin Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+R">Ran Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongliang Li</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.09280v5-abstract-short" style="display: inline;"> Universal domain adaptation (UniDA) aims to transfer the knowledge of common classes from the source domain to the target domain without any prior knowledge on the label set, which requires distinguishing in the target domain the unknown samples from the known ones. Recent methods usually focused on categorizing a target sample into one of the source classes rather than distinguishing known and un… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09280v5-abstract-full').style.display = 'inline'; document.getElementById('2207.09280v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.09280v5-abstract-full" style="display: none;"> Universal domain adaptation (UniDA) aims to transfer the knowledge of common classes from the source domain to the target domain without any prior knowledge on the label set, which requires distinguishing in the target domain the unknown samples from the known ones. Recent methods usually focused on categorizing a target sample into one of the source classes rather than distinguishing known and unknown samples, which ignores the inter-sample affinity between known and unknown samples and may lead to suboptimal performance. Aiming at this issue, we propose a novel UDA framework where such inter-sample affinity is exploited. Specifically, we introduce a knowability-based labeling scheme which can be divided into two steps: 1) Knowability-guided detection of known and unknown samples based on the intrinsic structure of the neighborhoods of samples, where we leverage the first singular vectors of the affinity matrices to obtain the knowability of every target sample. 2) Label refinement based on neighborhood consistency to relabel the target samples, where we refine the labels of each target sample based on its neighborhood consistency of predictions. Then, auxiliary losses based on the two steps are used to reduce the inter-sample affinity between the unknown and the known target samples. Finally, experiments on four public datasets demonstrate that our method significantly outperforms existing state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09280v5-abstract-full').style.display = 'none'; document.getElementById('2207.09280v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.03678">arXiv:2202.03678</a> <span> [<a href="https://arxiv.org/pdf/2202.03678">pdf</a>, <a href="https://arxiv.org/format/2202.03678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TPAMI.2022.3147570">10.1109/TPAMI.2022.3147570 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Quality Metric Guided Portrait Line Drawing Generation from Unpaired Training Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ran Yi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong-Jin Liu</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.03678v1-abstract-short" style="display: inline;"> Face portrait line drawing is a unique style of art which is highly abstract and expressive. However, due to its high semantic constraints, many existing methods learn to generate portrait drawings using paired training data, which is costly and time-consuming to obtain. In this paper, we propose a novel method to automatically transform face photos to portrait drawings using unpaired training dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.03678v1-abstract-full').style.display = 'inline'; document.getElementById('2202.03678v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.03678v1-abstract-full" style="display: none;"> Face portrait line drawing is a unique style of art which is highly abstract and expressive. However, due to its high semantic constraints, many existing methods learn to generate portrait drawings using paired training data, which is costly and time-consuming to obtain. In this paper, we propose a novel method to automatically transform face photos to portrait drawings using unpaired training data with two new features; i.e., our method can (1) learn to generate high quality portrait drawings in multiple styles using a single network and (2) generate portrait drawings in a "new style" unseen in the training data. To achieve these benefits, we (1) propose a novel quality metric for portrait drawings which is learned from human perception, and (2) introduce a quality loss to guide the network toward generating better looking portrait drawings. We observe that existing unpaired translation methods such as CycleGAN tend to embed invisible reconstruction information indiscriminately in the whole drawings due to significant information imbalance between the photo and portrait drawing domains, which leads to important facial features missing. To address this problem, we propose a novel asymmetric cycle mapping that enforces the reconstruction information to be visible and only embedded in the selected facial regions. Along with localized discriminators for important facial regions, our method well preserves all important facial features in the generated drawings. Generator dissection further explains that our model learns to incorporate face semantic information during drawing generation. Extensive experiments including a user study show that our model outperforms state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.03678v1-abstract-full').style.display = 'none'; document.getElementById('2202.03678v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Pattern Analysis and Machine Intelligence, https://doi.org/10.1109/TPAMI.2022.3147570, code: https://github.com/yiranran/QMUPD</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.08935">arXiv:2105.08935</a> <span> [<a href="https://arxiv.org/pdf/2105.08935">pdf</a>, <a href="https://arxiv.org/format/2105.08935">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> DeepFaceEditing: Deep Face Generation and Editing with Disentangled Geometry and Appearance Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shu-Yu Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Feng-Lin Liu</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chunpeng Li</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+H">Hongbo Fu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Lin Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.08935v2-abstract-short" style="display: inline;"> Recent facial image synthesis methods have been mainly based on conditional generative models. Sketch-based conditions can effectively describe the geometry of faces, including the contours of facial components, hair structures, as well as salient edges (e.g., wrinkles) on face surfaces but lack effective control of appearance, which is influenced by color, material, lighting condition, etc. To ha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.08935v2-abstract-full').style.display = 'inline'; document.getElementById('2105.08935v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.08935v2-abstract-full" style="display: none;"> Recent facial image synthesis methods have been mainly based on conditional generative models. Sketch-based conditions can effectively describe the geometry of faces, including the contours of facial components, hair structures, as well as salient edges (e.g., wrinkles) on face surfaces but lack effective control of appearance, which is influenced by color, material, lighting condition, etc. To have more control of generated results, one possible approach is to apply existing disentangling works to disentangle face images into geometry and appearance representations. However, existing disentangling methods are not optimized for human face editing, and cannot achieve fine control of facial details such as wrinkles. To address this issue, we propose DeepFaceEditing, a structured disentanglement framework specifically designed for face images to support face generation and editing with disentangled control of geometry and appearance. We adopt a local-to-global approach to incorporate the face domain knowledge: local component images are decomposed into geometry and appearance representations, which are fused consistently using a global fusion module to improve generation quality. We exploit sketches to assist in extracting a better geometry representation, which also supports intuitive geometry editing via sketching. The resulting method can either extract the geometry and appearance representations from face images, or directly extract the geometry representation from face sketches. Such representations allow users to easily edit and synthesize face images, with decoupled control of their geometry and appearance. Both qualitative and quantitative evaluations show the superior detail and appearance control abilities of our method compared to state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.08935v2-abstract-full').style.display = 'none'; document.getElementById('2105.08935v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.00633">arXiv:2009.00633</a> <span> [<a href="https://arxiv.org/pdf/2009.00633">pdf</a>, <a href="https://arxiv.org/format/2009.00633">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NPRportrait 1.0: A Three-Level Benchmark for Non-Photorealistic Rendering of Portraits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a>, <a href="/search/cs?searchtype=author&query=Mould%2C+D">David Mould</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ran Yi</a>, <a href="/search/cs?searchtype=author&query=Berger%2C+I">Itamar Berger</a>, <a href="/search/cs?searchtype=author&query=Doyle%2C+L">Lars Doyle</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Seungyong Lee</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chuan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong-Jin Liu</a>, <a href="/search/cs?searchtype=author&query=Semmo%2C+A">Amir Semmo</a>, <a href="/search/cs?searchtype=author&query=Shamir%2C+A">Ariel Shamir</a>, <a href="/search/cs?searchtype=author&query=Son%2C+M">Minjung Son</a>, <a href="/search/cs?searchtype=author&query=Winnemoller%2C+H">Holger Winnemoller</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.00633v1-abstract-short" style="display: inline;"> Despite the recent upsurge of activity in image-based non-photorealistic rendering (NPR), and in particular portrait image stylisation, due to the advent of neural style transfer, the state of performance evaluation in this field is limited, especially compared to the norms in the computer vision and machine learning communities. Unfortunately, the task of evaluating image stylisation is thus far… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.00633v1-abstract-full').style.display = 'inline'; document.getElementById('2009.00633v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.00633v1-abstract-full" style="display: none;"> Despite the recent upsurge of activity in image-based non-photorealistic rendering (NPR), and in particular portrait image stylisation, due to the advent of neural style transfer, the state of performance evaluation in this field is limited, especially compared to the norms in the computer vision and machine learning communities. Unfortunately, the task of evaluating image stylisation is thus far not well defined, since it involves subjective, perceptual and aesthetic aspects. To make progress towards a solution, this paper proposes a new structured, three level, benchmark dataset for the evaluation of stylised portrait images. Rigorous criteria were used for its construction, and its consistency was validated by user studies. Moreover, a new methodology has been developed for evaluating portrait stylisation algorithms, which makes use of the different benchmark levels as well as annotations provided by user studies regarding the characteristics of the faces. We perform evaluation for a wide variety of image stylisation methods (both portrait-specific and general purpose, and also both traditional NPR approaches and neural style transfer) using the new benchmark dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.00633v1-abstract-full').style.display = 'none'; document.getElementById('2009.00633v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.05336">arXiv:2008.05336</a> <span> [<a href="https://arxiv.org/pdf/2008.05336">pdf</a>, <a href="https://arxiv.org/format/2008.05336">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Image-based Portrait Engraving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.05336v1-abstract-short" style="display: inline;"> This paper describes a simple image-based method that applies engraving stylisation to portraits using ordered dithering. Face detection is used to estimate a rough proxy geometry of the head consisting of a cylinder, which is used to warp the dither matrix, causing the engraving lines to curve around the face for better stylisation. Finally, an application of the approach to colour engraving is d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.05336v1-abstract-full').style.display = 'inline'; document.getElementById('2008.05336v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.05336v1-abstract-full" style="display: none;"> This paper describes a simple image-based method that applies engraving stylisation to portraits using ordered dithering. Face detection is used to estimate a rough proxy geometry of the head consisting of a cylinder, which is used to warp the dither matrix, causing the engraving lines to curve around the face for better stylisation. Finally, an application of the approach to colour engraving is demonstrated. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.05336v1-abstract-full').style.display = 'none'; document.getElementById('2008.05336v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.08763">arXiv:2003.08763</a> <span> [<a href="https://arxiv.org/pdf/2003.08763">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Shape retrieval of non-rigid 3d human models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pickup%2C+D">David Pickup</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xianfang Sun</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L Rosin</a>, <a href="/search/cs?searchtype=author&query=Martin%2C+R+R">Ralph R Martin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Z">Z Cheng</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+Z">Zhouhui Lian</a>, <a href="/search/cs?searchtype=author&query=Aono%2C+M">Masaki Aono</a>, <a href="/search/cs?searchtype=author&query=Hamza%2C+A+B">A Ben Hamza</a>, <a href="/search/cs?searchtype=author&query=Bronstein%2C+A">A Bronstein</a>, <a href="/search/cs?searchtype=author&query=Bronstein%2C+M">M Bronstein</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+S">S Bu</a>, <a href="/search/cs?searchtype=author&query=Castellani%2C+U">Umberto Castellani</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">S Cheng</a>, <a href="/search/cs?searchtype=author&query=Garro%2C+V">Valeria Garro</a>, <a href="/search/cs?searchtype=author&query=Giachetti%2C+A">Andrea Giachetti</a>, <a href="/search/cs?searchtype=author&query=Godil%2C+A">Afzal Godil</a>, <a href="/search/cs?searchtype=author&query=Isaia%2C+L">Luca Isaia</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">J Han</a>, <a href="/search/cs?searchtype=author&query=Johan%2C+H">Henry Johan</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+L">L Lai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">C Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haisheng Li</a>, <a href="/search/cs?searchtype=author&query=Litman%2C+R">Roee Litman</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">X Liu</a> , et al. (6 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.08763v1-abstract-short" style="display: inline;"> 3D models of humans are commonly used within computer graphics and vision, and so the ability to distinguish between body shapes is an important shape retrieval problem. We extend our recent paper which provided a benchmark for testing non-rigid 3D shape retrieval algorithms on 3D human models. This benchmark provided a far stricter challenge than previous shape benchmarks. We have added 145 new m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.08763v1-abstract-full').style.display = 'inline'; document.getElementById('2003.08763v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.08763v1-abstract-full" style="display: none;"> 3D models of humans are commonly used within computer graphics and vision, and so the ability to distinguish between body shapes is an important shape retrieval problem. We extend our recent paper which provided a benchmark for testing non-rigid 3D shape retrieval algorithms on 3D human models. This benchmark provided a far stricter challenge than previous shape benchmarks. We have added 145 new models for use as a separate training set, in order to standardise the training data used and provide a fairer comparison. We have also included experiments with the FAUST dataset of human scans. All participants of the previous benchmark study have taken part in the new tests reported here, many providing updated results using the new data. In addition, further participants have also taken part, and we provide extra analysis of the retrieval results. A total of 25 different shape retrieval methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.08763v1-abstract-full').style.display = 'none'; document.getElementById('2003.08763v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">International Journal of Computer Vision, 2016</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.14063">arXiv:1910.14063</a> <span> [<a href="https://arxiv.org/pdf/1910.14063">pdf</a>, <a href="https://arxiv.org/format/1910.14063">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LaplacianNet: Learning on 3D Meshes with Laplacian Encoding and Pooling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiao%2C+Y">Yi-Ling Qiao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Lin Gao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jie Yang</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xilin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.14063v1-abstract-short" style="display: inline;"> 3D models are commonly used in computer vision and graphics. With the wider availability of mesh data, an efficient and intrinsic deep learning approach to processing 3D meshes is in great need. Unlike images, 3D meshes have irregular connectivity, requiring careful design to capture relations in the data. To utilize the topology information while staying robust under different triangulation, we p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.14063v1-abstract-full').style.display = 'inline'; document.getElementById('1910.14063v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.14063v1-abstract-full" style="display: none;"> 3D models are commonly used in computer vision and graphics. With the wider availability of mesh data, an efficient and intrinsic deep learning approach to processing 3D meshes is in great need. Unlike images, 3D meshes have irregular connectivity, requiring careful design to capture relations in the data. To utilize the topology information while staying robust under different triangulation, we propose to encode mesh connectivity using Laplacian spectral analysis, along with Mesh Pooling Blocks (MPBs) that can split the surface domain into local pooling patches and aggregate global information among them. We build a mesh hierarchy from fine to coarse using Laplacian spectral clustering, which is flexible under isometric transformation. Inside the MPBs there are pooling layers to collect local information and multi-layer perceptrons to compute vertex features with increasing complexity. To obtain the relationships among different clusters, we introduce a Correlation Net to compute a correlation matrix, which can aggregate the features globally by matrix multiplication with cluster features. Our network architecture is flexible enough to be used on meshes with different numbers of vertices. We conduct several experiments including shape segmentation and classification, and our LaplacianNet outperforms state-of-the-art algorithms for these tasks on ShapeNet and COSEG datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.14063v1-abstract-full').style.display = 'none'; document.getElementById('1910.14063v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1908.08433">arXiv:1908.08433</a> <span> [<a href="https://arxiv.org/pdf/1908.08433">pdf</a>, <a href="https://arxiv.org/format/1908.08433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Scoot: A Perceptual Metric for Facial Sketches </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+D">Deng-Ping Fan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">ShengChuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu-Huan Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yun Liu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Ming-Ming Cheng</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+B">Bo Ren</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+R">Rongrong Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1908.08433v2-abstract-short" style="display: inline;"> Human visual system has the strong ability to quick assess the perceptual similarity between two facial sketches. However, existing two widely-used facial sketch metrics, e.g., FSIM and SSIM fail to address this perceptual similarity in this field. Recent study in facial modeling area has verified that the inclusion of both structure and texture has a significant positive benefit for face sketch s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.08433v2-abstract-full').style.display = 'inline'; document.getElementById('1908.08433v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1908.08433v2-abstract-full" style="display: none;"> Human visual system has the strong ability to quick assess the perceptual similarity between two facial sketches. However, existing two widely-used facial sketch metrics, e.g., FSIM and SSIM fail to address this perceptual similarity in this field. Recent study in facial modeling area has verified that the inclusion of both structure and texture has a significant positive benefit for face sketch synthesis (FSS). But which statistics are more important, and are helpful for their success? In this paper, we design a perceptual metric,called Structure Co-Occurrence Texture (Scoot), which simultaneously considers the block-level spatial structure and co-occurrence texture statistics. To test the quality of metrics, we propose three novel meta-measures based on various reliable properties. Extensive experiments demonstrate that our Scoot metric exceeds the performance of prior work. Besides, we built the first large scale (152k judgments) human-perception-based sketch database that can evaluate how well a metric is consistent with human perception. Our results suggest that "spatial structure" and "co-occurrence texture" are two generally applicable perceptual features in face sketch synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.08433v2-abstract-full').style.display = 'none'; document.getElementById('1908.08433v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code & dataset:http://mmcheng.net/scoot/, 11 pages, ICCV 2019, First one good evaluation metric for facial sketh that consistent with human judgment. arXiv admin note: text overlap with arXiv:1804.02975</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1901.07689">arXiv:1901.07689</a> <span> [<a href="https://arxiv.org/pdf/1901.07689">pdf</a>, <a href="https://arxiv.org/format/1901.07689">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TIP.2019.2903294">10.1109/TIP.2019.2903294 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Simultaneous Subspace Clustering and Cluster Number Estimating based on Triplet Relationship </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jie Liang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jufeng Yang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Ming-Ming Cheng</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1901.07689v1-abstract-short" style="display: inline;"> In this paper we propose a unified framework to simultaneously discover the number of clusters and group the data points into them using subspace clustering. Real data distributed in a high-dimensional space can be disentangled into a union of low-dimensional subspaces, which can benefit various applications. To explore such intrinsic structure, state-of-the-art subspace clustering approaches ofte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.07689v1-abstract-full').style.display = 'inline'; document.getElementById('1901.07689v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1901.07689v1-abstract-full" style="display: none;"> In this paper we propose a unified framework to simultaneously discover the number of clusters and group the data points into them using subspace clustering. Real data distributed in a high-dimensional space can be disentangled into a union of low-dimensional subspaces, which can benefit various applications. To explore such intrinsic structure, state-of-the-art subspace clustering approaches often optimize a self-representation problem among all samples, to construct a pairwise affinity graph for spectral clustering. However, a graph with pairwise similarities lacks robustness for segmentation, especially for samples which lie on the intersection of two subspaces. To address this problem, we design a hyper-correlation based data structure termed as the \textit{triplet relationship}, which reveals high relevance and local compactness among three samples. The triplet relationship can be derived from the self-representation matrix, and be utilized to iteratively assign the data points to clusters. Three samples in each triplet are encouraged to be highly correlated and are considered as a meta-element during clustering, which show more robustness than pairwise relationships when segmenting two densely distributed subspaces. Based on the triplet relationship, we propose a unified optimizing scheme to automatically calculate clustering assignments. Specifically, we optimize a model selection reward and a fusion reward by simultaneously maximizing the similarity of triplets from different clusters while minimizing the correlation of triplets from same cluster. The proposed algorithm also automatically reveals the number of clusters and fuses groups to avoid over-segmentation. Extensive experimental results on both synthetic and real-world datasets validate the effectiveness and robustness of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.07689v1-abstract-full').style.display = 'none'; document.getElementById('1901.07689v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 4 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1804.02975">arXiv:1804.02975</a> <span> [<a href="https://arxiv.org/pdf/1804.02975">pdf</a>, <a href="https://arxiv.org/format/1804.02975">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Face Sketch Synthesis Style Similarity:A New Structure Co-occurrence Texture Measure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+D">Deng-Ping Fan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">ShengChuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu-Huan Wu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Ming-Ming Cheng</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+B">Bo Ren</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+R">Rongrong Ji</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1804.02975v1-abstract-short" style="display: inline;"> Existing face sketch synthesis (FSS) similarity measures are sensitive to slight image degradation (e.g., noise, blur). However, human perception of the similarity of two sketches will consider both structure and texture as essential factors and is not sensitive to slight ("pixel-level") mismatches. Consequently, the use of existing similarity measures can lead to better algorithms receiving a low… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.02975v1-abstract-full').style.display = 'inline'; document.getElementById('1804.02975v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1804.02975v1-abstract-full" style="display: none;"> Existing face sketch synthesis (FSS) similarity measures are sensitive to slight image degradation (e.g., noise, blur). However, human perception of the similarity of two sketches will consider both structure and texture as essential factors and is not sensitive to slight ("pixel-level") mismatches. Consequently, the use of existing similarity measures can lead to better algorithms receiving a lower score than worse algorithms. This unreliable evaluation has significantly hindered the development of the FSS field. To solve this problem, we propose a novel and robust style similarity measure called Scoot-measure (Structure CO-Occurrence Texture Measure), which simultaneously evaluates "block-level" spatial structure and co-occurrence texture statistics. In addition, we further propose 4 new meta-measures and create 2 new datasets to perform a comprehensive evaluation of several widely-used FSS measures on two large databases. Experimental results demonstrate that our measure not only provides a reliable evaluation but also achieves significantly improved performance. Specifically, the study indicated a higher degree (78.8%) of correlation between our measure and human judgment than the best prior measure (58.6%). Our code will be made available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.02975v1-abstract-full').style.display = 'none'; document.getElementById('1804.02975v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9pages, 15 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1803.10683">arXiv:1803.10683</a> <span> [<a href="https://arxiv.org/pdf/1803.10683">pdf</a>, <a href="https://arxiv.org/format/1803.10683">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Pose2Seg: Detection Free Human Instance Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Song-Hai Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruilong Li</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+X">Xin Dong</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Z">Zixi Cai</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xi Han</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Dingcheng Yang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hao-Zhi Huang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shi-Min Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1803.10683v3-abstract-short" style="display: inline;"> The standard approach to image instance segmentation is to perform the object detection first, and then segment the object from the detection bounding-box. More recently, deep learning methods like Mask R-CNN perform them jointly. However, little research takes into account the uniqueness of the "human" category, which can be well defined by the pose skeleton. Moreover, the human pose skeleton can… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.10683v3-abstract-full').style.display = 'inline'; document.getElementById('1803.10683v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1803.10683v3-abstract-full" style="display: none;"> The standard approach to image instance segmentation is to perform the object detection first, and then segment the object from the detection bounding-box. More recently, deep learning methods like Mask R-CNN perform them jointly. However, little research takes into account the uniqueness of the "human" category, which can be well defined by the pose skeleton. Moreover, the human pose skeleton can be used to better distinguish instances with heavy occlusion than using bounding-boxes. In this paper, we present a brand new pose-based instance segmentation framework for humans which separates instances based on human pose, rather than proposal region detection. We demonstrate that our pose-based framework can achieve better accuracy than the state-of-art detection-based approach on the human instance segmentation problem, and can moreover better handle occlusion. Furthermore, there are few public datasets containing many heavily occluded humans along with comprehensive annotations, which makes this a challenging problem seldom noticed by researchers. Therefore, in this paper we introduce a new benchmark "Occluded Human (OCHuman)", which focuses on occluded humans with comprehensive annotations including bounding-box, human pose and instance masks. This dataset contains 8110 detailed annotated human instances within 4731 images. With an average 0.67 MaxIoU for each person, OCHuman is the most complex and challenging dataset related to human instance segmentation. Through this dataset, we want to emphasize occlusion as a challenging problem for researchers to study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.10683v3-abstract-full').style.display = 'none'; document.getElementById('1803.10683v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 March, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> CVPR 2019 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1708.09641">arXiv:1708.09641</a> <span> [<a href="https://arxiv.org/pdf/1708.09641">pdf</a>, <a href="https://arxiv.org/format/1708.09641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Automatic Semantic Style Transfer using Deep Convolutional Neural Networks and Soft Masks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Huihuang Zhao</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yu-Kun Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1708.09641v1-abstract-short" style="display: inline;"> This paper presents an automatic image synthesis method to transfer the style of an example image to a content image. When standard neural style transfer approaches are used, the textures and colours in different semantic regions of the style image are often applied inappropriately to the content image, ignoring its semantic layout, and ruining the transfer result. In order to reduce or avoid such… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1708.09641v1-abstract-full').style.display = 'inline'; document.getElementById('1708.09641v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1708.09641v1-abstract-full" style="display: none;"> This paper presents an automatic image synthesis method to transfer the style of an example image to a content image. When standard neural style transfer approaches are used, the textures and colours in different semantic regions of the style image are often applied inappropriately to the content image, ignoring its semantic layout, and ruining the transfer result. In order to reduce or avoid such effects, we propose a novel method based on automatically segmenting the objects and extracting their soft semantic masks from the style and content images, in order to preserve the structure of the content image while having the style transferred. Each soft mask of the style image represents a specific part of the style image, corresponding to the soft mask of the content image with the same semantics. Both the soft masks and source images are provided as multichannel input to an augmented deep CNN framework for style transfer which incorporates a generative Markov random field (MRF) model. Results on various images show that our method outperforms the most recent techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1708.09641v1-abstract-full').style.display = 'none'; document.getElementById('1708.09641v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1612.01810">arXiv:1612.01810</a> <span> [<a href="https://arxiv.org/pdf/1612.01810">pdf</a>, <a href="https://arxiv.org/format/1612.01810">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FLIC: Fast Linear Iterative Clustering with Active Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jiaxing Zhao</a>, <a href="/search/cs?searchtype=author&query=Bo%2C+R">Ren Bo</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+Q">Qibin Hou</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Ming-Ming Cheng</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1612.01810v3-abstract-short" style="display: inline;"> Benefiting from its high efficiency and simplicity, Simple Linear Iterative Clustering (SLIC) remains one of the most popular over-segmentation tools. However, due to explicit enforcement of spatial similarity for region continuity, the boundary adaptation of SLIC is sub-optimal. It also has drawbacks on convergence rate as a result of both the fixed search region and separately doing the assignme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1612.01810v3-abstract-full').style.display = 'inline'; document.getElementById('1612.01810v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1612.01810v3-abstract-full" style="display: none;"> Benefiting from its high efficiency and simplicity, Simple Linear Iterative Clustering (SLIC) remains one of the most popular over-segmentation tools. However, due to explicit enforcement of spatial similarity for region continuity, the boundary adaptation of SLIC is sub-optimal. It also has drawbacks on convergence rate as a result of both the fixed search region and separately doing the assignment step and the update step. In this paper, we propose an alternative approach to fix the inherent limitations of SLIC. In our approach, each pixel actively searches its corresponding segment under the help of its neighboring pixels, which naturally enables region coherence without being harmful to boundary adaptation. We also jointly perform the assignment and update steps, allowing high convergence rate. Extensive evaluations on Berkeley segmentation benchmark verify that our method outperforms competitive methods under various evaluation metrics. It also has the lowest time cost among existing methods (approximately 30fps for a 481x321 image on a single CPU core). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1612.01810v3-abstract-full').style.display = 'none'; document.getElementById('1612.01810v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 December, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1605.05106">arXiv:1605.05106</a> <span> [<a href="https://arxiv.org/pdf/1605.05106">pdf</a>, <a href="https://arxiv.org/format/1605.05106">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s00138-017-0830-x">10.1007/s00138-017-0830-x <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Detecting Violent and Abnormal Crowd activity using Temporal Analysis of Grey Level Co-occurrence Matrix (GLCM) Based Texture Measures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lloyd%2C+K">Kaelon Lloyd</a>, <a href="/search/cs?searchtype=author&query=Marshall%2C+D">David Marshall</a>, <a href="/search/cs?searchtype=author&query=Moore%2C+S+C">Simon C. Moore</a>, <a href="/search/cs?searchtype=author&query=Rosin%2C+P+L">Paul L. Rosin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1605.05106v2-abstract-short" style="display: inline;"> The severity of sustained injury resulting from assault-related violence can be minimised by reducing detection time. However, it has been shown that human operators perform poorly at detecting events found in video footage when presented with simultaneous feeds. We utilise computer vision techniques to develop an automated method of abnormal crowd detection that can aid a human operator in the de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1605.05106v2-abstract-full').style.display = 'inline'; document.getElementById('1605.05106v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1605.05106v2-abstract-full" style="display: none;"> The severity of sustained injury resulting from assault-related violence can be minimised by reducing detection time. However, it has been shown that human operators perform poorly at detecting events found in video footage when presented with simultaneous feeds. We utilise computer vision techniques to develop an automated method of abnormal crowd detection that can aid a human operator in the detection of violent behaviour. We observed that behaviour in city centre environments often occur in crowded areas, resulting in individual actions being occluded by other crowd members. We propose a real-time descriptor that models crowd dynamics by encoding changes in crowd texture using temporal summaries of Grey Level Co-Occurrence Matrix (GLCM) features. We introduce a measure of inter-frame uniformity (IFU) and demonstrate that the appearance of violent behaviour changes in a less uniform manner when compared to other types of crowd behaviour. Our proposed method is computationally cheap and offers real-time description. Evaluating our method using a privately held CCTV dataset and the publicly available Violent Flows, UCF Web Abnormality, and UMN Abnormal Crowd datasets, we report a receiver operating characteristic score of 0.9782, 0.9403, 0.8218 and 0.9956 respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1605.05106v2-abstract-full').style.display = 'none'; document.getElementById('1605.05106v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published under open access, 9 pages, 12 Figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.10; I.4.7; I.4.8 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Machine Vision and Applications (2017) </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository