Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–49 of 49 results for author: <span class="mathjax">Panagakis, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Panagakis%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Panagakis, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Panagakis%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Panagakis, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16845">arXiv:2408.16845</a> <span> [<a href="https://arxiv.org/pdf/2408.16845">pdf</a>, <a href="https://arxiv.org/format/2408.16845">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Enabling Local Editing in Diffusion Models by Joint and Individual Component Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kouzelis%2C+T">Theodoros Kouzelis</a>, <a href="/search/cs?searchtype=author&query=Plitsis%2C+M">Manos Plitsis</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16845v2-abstract-short" style="display: inline;"> Recent advances in Diffusion Models (DMs) have led to significant progress in visual synthesis and editing tasks, establishing them as a strong competitor to Generative Adversarial Networks (GANs). However, the latent space of DMs is not as well understood as that of GANs. Recent research has focused on unsupervised semantic discovery in the latent space of DMs by leveraging the bottleneck layer o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16845v2-abstract-full').style.display = 'inline'; document.getElementById('2408.16845v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16845v2-abstract-full" style="display: none;"> Recent advances in Diffusion Models (DMs) have led to significant progress in visual synthesis and editing tasks, establishing them as a strong competitor to Generative Adversarial Networks (GANs). However, the latent space of DMs is not as well understood as that of GANs. Recent research has focused on unsupervised semantic discovery in the latent space of DMs by leveraging the bottleneck layer of the denoising network, which has been shown to exhibit properties of a semantic latent space. However, these approaches are limited to discovering global attributes. In this paper we address, the challenge of local image manipulation in DMs and introduce an unsupervised method to factorize the latent semantics learned by the denoising network of pre-trained DMs. Given an arbitrary image and defined regions of interest, we utilize the Jacobian of the denoising network to establish a relation between the regions of interest and their corresponding subspaces in the latent space. Furthermore, we disentangle the joint and individual components of these subspaces to identify latent directions that enable local image manipulation. Once discovered, these directions can be applied to different images to produce semantically consistent edits, making our method suitable for practical applications. Experimental results on various datasets demonstrate that our method can produce semantic edits that are more localized and have better fidelity compared to the state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16845v2-abstract-full').style.display = 'none'; document.getElementById('2408.16845v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at BMVC2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10685">arXiv:2406.10685</a> <span> [<a href="https://arxiv.org/pdf/2406.10685">pdf</a>, <a href="https://arxiv.org/format/2406.10685">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scale Equivariant Graph Metanetworks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kalogeropoulos%2C+I">Ioannis Kalogeropoulos</a>, <a href="/search/cs?searchtype=author&query=Bouritsas%2C+G">Giorgos Bouritsas</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10685v2-abstract-short" style="display: inline;"> This paper pertains to an emerging machine learning paradigm: learning higher-order functions, i.e. functions whose inputs are functions themselves, $\textit{particularly when these inputs are Neural Networks (NNs)}$. With the growing interest in architectures that process NNs, a recurring design principle has permeated the field: adhering to the permutation symmetries arising from the connectioni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10685v2-abstract-full').style.display = 'inline'; document.getElementById('2406.10685v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10685v2-abstract-full" style="display: none;"> This paper pertains to an emerging machine learning paradigm: learning higher-order functions, i.e. functions whose inputs are functions themselves, $\textit{particularly when these inputs are Neural Networks (NNs)}$. With the growing interest in architectures that process NNs, a recurring design principle has permeated the field: adhering to the permutation symmetries arising from the connectionist structure of NNs. $\textit{However, are these the sole symmetries present in NN parameterizations}$? Zooming into most practical activation functions (e.g. sine, ReLU, tanh) answers this question negatively and gives rise to intriguing new symmetries, which we collectively refer to as $\textit{scaling symmetries}$, that is, non-zero scalar multiplications and divisions of weights and biases. In this work, we propose $\textit{Scale Equivariant Graph MetaNetworks - ScaleGMNs}$, a framework that adapts the Graph Metanetwork (message-passing) paradigm by incorporating scaling symmetries and thus rendering neuron and edge representations equivariant to valid scalings. We introduce novel building blocks, of independent technical interest, that allow for equivariance or invariance with respect to individual scalar multipliers or their product and use them in all components of ScaleGMN. Furthermore, we prove that, under certain expressivity conditions, ScaleGMN can simulate the forward and backward pass of any input feedforward neural network. Experimental results demonstrate that our method advances the state-of-the-art performance for several datasets and activation functions, highlighting the power of scaling symmetries as an inductive bias for NN processing. The source code is publicly available at https://github.com/jkalogero/scalegmn. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10685v2-abstract-full').style.display = 'none'; document.getElementById('2406.10685v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2024. 34 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18045">arXiv:2405.18045</a> <span> [<a href="https://arxiv.org/pdf/2405.18045">pdf</a>, <a href="https://arxiv.org/format/2405.18045">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Bridging Mini-Batch and Asymptotic Analysis in Contrastive Learning: From InfoNCE to Kernel-Based Losses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Koromilas%2C+P">Panagiotis Koromilas</a>, <a href="/search/cs?searchtype=author&query=Bouritsas%2C+G">Giorgos Bouritsas</a>, <a href="/search/cs?searchtype=author&query=Giannakopoulos%2C+T">Theodoros Giannakopoulos</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M">Mihalis Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18045v1-abstract-short" style="display: inline;"> What do different contrastive learning (CL) losses actually optimize for? Although multiple CL methods have demonstrated remarkable representation learning capabilities, the differences in their inner workings remain largely opaque. In this work, we analyse several CL families and prove that, under certain conditions, they admit the same minimisers when optimizing either their batch-level objectiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18045v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18045v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18045v1-abstract-full" style="display: none;"> What do different contrastive learning (CL) losses actually optimize for? Although multiple CL methods have demonstrated remarkable representation learning capabilities, the differences in their inner workings remain largely opaque. In this work, we analyse several CL families and prove that, under certain conditions, they admit the same minimisers when optimizing either their batch-level objectives or their expectations asymptotically. In both cases, an intimate connection with the hyperspherical energy minimisation (HEM) problem resurfaces. Drawing inspiration from this, we introduce a novel CL objective, coined Decoupled Hyperspherical Energy Loss (DHEL). DHEL simplifies the problem by decoupling the target hyperspherical energy from the alignment of positive examples while preserving the same theoretical guarantees. Going one step further, we show the same results hold for another relevant CL family, namely kernel contrastive learning (KCL), with the additional advantage of the expected loss being independent of batch size, thus identifying the minimisers in the non-asymptotic regime. Empirical results demonstrate improved downstream performance and robustness across combinations of different batch sizes and hyperparameters and reduced dimensionality collapse, on several computer vision datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18045v1-abstract-full').style.display = 'none'; document.getElementById('2405.18045v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICML 2024. Code available at: https://github.com/pakoromilas/DHEL-KCL.git</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.20287">arXiv:2403.20287</a> <span> [<a href="https://arxiv.org/pdf/2403.20287">pdf</a>, <a href="https://arxiv.org/format/2403.20287">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Counterfactual Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Melistas%2C+T">Thomas Melistas</a>, <a href="/search/cs?searchtype=author&query=Spyrou%2C+N">Nikos Spyrou</a>, <a href="/search/cs?searchtype=author&query=Gkouti%2C+N">Nefeli Gkouti</a>, <a href="/search/cs?searchtype=author&query=Sanchez%2C+P">Pedro Sanchez</a>, <a href="/search/cs?searchtype=author&query=Vlontzos%2C+A">Athanasios Vlontzos</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Papanastasiou%2C+G">Giorgos Papanastasiou</a>, <a href="/search/cs?searchtype=author&query=Tsaftaris%2C+S+A">Sotirios A. Tsaftaris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.20287v3-abstract-short" style="display: inline;"> Generative AI has revolutionised visual content editing, empowering users to effortlessly modify images and videos. However, not all edits are equal. To perform realistic edits in domains such as natural image or medical imaging, modifications must respect causal relationships inherent to the data generation process. Such image editing falls into the counterfactual image generation regime. Evaluat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.20287v3-abstract-full').style.display = 'inline'; document.getElementById('2403.20287v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.20287v3-abstract-full" style="display: none;"> Generative AI has revolutionised visual content editing, empowering users to effortlessly modify images and videos. However, not all edits are equal. To perform realistic edits in domains such as natural image or medical imaging, modifications must respect causal relationships inherent to the data generation process. Such image editing falls into the counterfactual image generation regime. Evaluating counterfactual image generation is substantially complex: not only it lacks observable ground truths, but also requires adherence to causal constraints. Although several counterfactual image generation methods and evaluation metrics exist, a comprehensive comparison within a unified setting is lacking. We present a comparison framework to thoroughly benchmark counterfactual image generation methods. We integrate all models that have been used for the task at hand and expand them to novel datasets and causal graphs, demonstrating the superiority of Hierarchical VAEs across most datasets and metrics. Our framework is implemented in a user-friendly Python package that can be extended to incorporate additional SCMs, causal methods, generative models, and datasets for the community to build on. Code: https://github.com/gulnazaki/counterfactual-benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.20287v3-abstract-full').style.display = 'none'; document.getElementById('2403.20287v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.12550">arXiv:2402.12550</a> <span> [<a href="https://arxiv.org/pdf/2402.12550">pdf</a>, <a href="https://arxiv.org/format/2402.12550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multilinear Mixture of Experts: Scalable Expert Specialization through Factorization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Oldfield%2C+J">James Oldfield</a>, <a href="/search/cs?searchtype=author&query=Georgopoulos%2C+M">Markos Georgopoulos</a>, <a href="/search/cs?searchtype=author&query=Chrysos%2C+G+G">Grigorios G. Chrysos</a>, <a href="/search/cs?searchtype=author&query=Tzelepis%2C+C">Christos Tzelepis</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiankang Deng</a>, <a href="/search/cs?searchtype=author&query=Patras%2C+I">Ioannis Patras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.12550v4-abstract-short" style="display: inline;"> The Mixture of Experts (MoE) paradigm provides a powerful way to decompose dense layers into smaller, modular computations often more amenable to human interpretation, debugging, and editability. However, a major challenge lies in the computational cost of scaling the number of experts high enough to achieve fine-grained specialization. In this paper, we propose the Multilinear Mixture of Experts… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12550v4-abstract-full').style.display = 'inline'; document.getElementById('2402.12550v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.12550v4-abstract-full" style="display: none;"> The Mixture of Experts (MoE) paradigm provides a powerful way to decompose dense layers into smaller, modular computations often more amenable to human interpretation, debugging, and editability. However, a major challenge lies in the computational cost of scaling the number of experts high enough to achieve fine-grained specialization. In this paper, we propose the Multilinear Mixture of Experts ($渭$MoE) layer to address this, focusing on vision models. $渭$MoE layers enable scalable expert specialization by performing an implicit computation on prohibitively large weight tensors entirely in factorized form. Consequently, $渭$MoEs (1) avoid the restrictively high inference-time costs of dense MoEs, yet (2) do not inherit the training issues of the popular sparse MoEs' discrete (non-differentiable) expert routing. We present both qualitative and quantitative evidence that scaling $渭$MoE layers when fine-tuning foundation models for vision tasks leads to more specialized experts at the class-level, further enabling manual bias correction in CelebA attribute classification. Finally, we show qualitative results demonstrating the expert specialism achieved when pre-training large GPT2 and MLP-Mixer models with parameter-matched $渭$MoE blocks at every layer, maintaining comparable accuracy. Our code is available at: https://github.com/james-oldfield/muMoE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12550v4-abstract-full').style.display = 'none'; document.getElementById('2402.12550v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2024. Github: https://github.com/james-oldfield/muMoE. Project page: https://james-oldfield.github.io/muMoE</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.17968">arXiv:2311.17968</a> <span> [<a href="https://arxiv.org/pdf/2311.17968">pdf</a>, <a href="https://arxiv.org/format/2311.17968">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Latent Alignment with Deep Set EEG Decoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bakas%2C+S">Stylianos Bakas</a>, <a href="/search/cs?searchtype=author&query=Ludwig%2C+S">Siegfried Ludwig</a>, <a href="/search/cs?searchtype=author&query=Adamos%2C+D+A">Dimitrios A. Adamos</a>, <a href="/search/cs?searchtype=author&query=Laskaris%2C+N">Nikolaos Laskaris</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.17968v1-abstract-short" style="display: inline;"> The variability in EEG signals between different individuals poses a significant challenge when implementing brain-computer interfaces (BCI). Commonly proposed solutions to this problem include deep learning models, due to their increased capacity and generalization, as well as explicit domain adaptation techniques. Here, we introduce the Latent Alignment method that won the Benchmarks for EEG Tra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.17968v1-abstract-full').style.display = 'inline'; document.getElementById('2311.17968v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.17968v1-abstract-full" style="display: none;"> The variability in EEG signals between different individuals poses a significant challenge when implementing brain-computer interfaces (BCI). Commonly proposed solutions to this problem include deep learning models, due to their increased capacity and generalization, as well as explicit domain adaptation techniques. Here, we introduce the Latent Alignment method that won the Benchmarks for EEG Transfer Learning (BEETL) competition and present its formulation as a deep set applied on the set of trials from a given subject. Its performance is compared to recent statistical domain adaptation techniques under various conditions. The experimental paradigms include motor imagery (MI), oddball event-related potentials (ERP) and sleep stage classification, where different well-established deep learning models are applied on each task. Our experimental results show that performing statistical distribution alignment at later stages in a deep learning model is beneficial to the classification accuracy, yielding the highest performance for our proposed method. We further investigate practical considerations that arise in the context of using deep learning and statistical alignment for EEG decoding. In this regard, we study class-discriminative artifacts that can spuriously improve results for deep learning models, as well as the impact of class-imbalance on alignment. We delineate a trade-off relationship between increased classification accuracy when alignment is performed at later modeling stages, and susceptibility to class-imbalance in the set of trials that the statistics are computed on. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.17968v1-abstract-full').style.display = 'none'; document.getElementById('2311.17968v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.14883">arXiv:2309.14883</a> <span> [<a href="https://arxiv.org/pdf/2309.14883">pdf</a>, <a href="https://arxiv.org/format/2309.14883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Locality-preserving Directions for Interpreting the Latent Space of Satellite Image GANs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kourmouli%2C+G">Georgia Kourmouli</a>, <a href="/search/cs?searchtype=author&query=Kostagiolas%2C+N">Nikos Kostagiolas</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.14883v1-abstract-short" style="display: inline;"> We present a locality-aware method for interpreting the latent space of wavelet-based Generative Adversarial Networks (GANs), that can well capture the large spatial and spectral variability that is characteristic to satellite imagery. By focusing on preserving locality, the proposed method is able to decompose the weight-space of pre-trained GANs and recover interpretable directions that correspo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.14883v1-abstract-full').style.display = 'inline'; document.getElementById('2309.14883v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.14883v1-abstract-full" style="display: none;"> We present a locality-aware method for interpreting the latent space of wavelet-based Generative Adversarial Networks (GANs), that can well capture the large spatial and spectral variability that is characteristic to satellite imagery. By focusing on preserving locality, the proposed method is able to decompose the weight-space of pre-trained GANs and recover interpretable directions that correspond to high-level semantic concepts (such as urbanization, structure density, flora presence) - that can subsequently be used for guided synthesis of satellite imagery. In contrast to typically used approaches that focus on capturing the variability of the weight-space in a reduced dimensionality space (i.e., based on Principal Component Analysis, PCA), we show that preserving locality leads to vectors with different angles, that are more robust to artifacts and can better preserve class information. Via a set of quantitative and qualitative examples, we further show that the proposed approach can outperform both baseline geometric augmentations, as well as global, PCA-based approaches for data synthesis in the context of data augmentation for satellite scene classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.14883v1-abstract-full').style.display = 'none'; document.getElementById('2309.14883v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.11140">arXiv:2309.11140</a> <span> [<a href="https://arxiv.org/pdf/2309.11140">pdf</a>, <a href="https://arxiv.org/format/2309.11140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Investigating Personalization Methods in Text to Music Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Plitsis%2C+M">Manos Plitsis</a>, <a href="/search/cs?searchtype=author&query=Kouzelis%2C+T">Theodoros Kouzelis</a>, <a href="/search/cs?searchtype=author&query=Paraskevopoulos%2C+G">Georgios Paraskevopoulos</a>, <a href="/search/cs?searchtype=author&query=Katsouros%2C+V">Vassilis Katsouros</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.11140v1-abstract-short" style="display: inline;"> In this work, we investigate the personalization of text-to-music diffusion models in a few-shot setting. Motivated by recent advances in the computer vision domain, we are the first to explore the combination of pre-trained text-to-audio diffusers with two established personalization methods. We experiment with the effect of audio-specific data augmentation on the overall system performance and a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11140v1-abstract-full').style.display = 'inline'; document.getElementById('2309.11140v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.11140v1-abstract-full" style="display: none;"> In this work, we investigate the personalization of text-to-music diffusion models in a few-shot setting. Motivated by recent advances in the computer vision domain, we are the first to explore the combination of pre-trained text-to-audio diffusers with two established personalization methods. We experiment with the effect of audio-specific data augmentation on the overall system performance and assess different training strategies. For evaluation, we construct a novel dataset with prompts and music clips. We consider both embedding-based and music-specific metrics for quantitative evaluation, as well as a user study for qualitative evaluation. Our analysis shows that similarity metrics are in accordance with user preferences and that current personalization approaches tend to learn rhythmic music constructs more easily than melody. The code, dataset, and example material of this study are open to the research community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11140v1-abstract-full').style.display = 'none'; document.getElementById('2309.11140v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICASSP 2024, Examples at https://zelaki.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.16584">arXiv:2307.16584</a> <span> [<a href="https://arxiv.org/pdf/2307.16584">pdf</a>, <a href="https://arxiv.org/format/2307.16584">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Audio-visual video-to-speech synthesis with synthesized input audio </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kefalas%2C+T">Triantafyllos Kefalas</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.16584v1-abstract-short" style="display: inline;"> Video-to-speech synthesis involves reconstructing the speech signal of a speaker from a silent video. The implicit assumption of this task is that the sound signal is either missing or contains a high amount of noise/corruption such that it is not useful for processing. Previous works in the literature either use video inputs only or employ both video and audio inputs during training, and discard… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16584v1-abstract-full').style.display = 'inline'; document.getElementById('2307.16584v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.16584v1-abstract-full" style="display: none;"> Video-to-speech synthesis involves reconstructing the speech signal of a speaker from a silent video. The implicit assumption of this task is that the sound signal is either missing or contains a high amount of noise/corruption such that it is not useful for processing. Previous works in the literature either use video inputs only or employ both video and audio inputs during training, and discard the input audio pathway during inference. In this work we investigate the effect of using video and audio inputs for video-to-speech synthesis during both training and inference. In particular, we use pre-trained video-to-speech models to synthesize the missing speech signals and then train an audio-visual-to-speech synthesis model, using both the silent video and the synthesized speech as inputs, to predict the final reconstructed speech. Our experiments demonstrate that this approach is successful with both raw waveforms and mel spectrograms as target outputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16584v1-abstract-full').style.display = 'none'; document.getElementById('2307.16584v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.15464">arXiv:2306.15464</a> <span> [<a href="https://arxiv.org/pdf/2306.15464">pdf</a>, <a href="https://arxiv.org/format/2306.15464">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Large-scale unsupervised audio pre-training for video-to-speech synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kefalas%2C+T">Triantafyllos Kefalas</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.15464v2-abstract-short" style="display: inline;"> Video-to-speech synthesis is the task of reconstructing the speech signal from a silent video of a speaker. Most established approaches to date involve a two-step process, whereby an intermediate representation from the video, such as a spectrogram, is extracted first and then passed to a vocoder to produce the raw audio. Some recent work has focused on end-to-end synthesis, whereby the generation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.15464v2-abstract-full').style.display = 'inline'; document.getElementById('2306.15464v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.15464v2-abstract-full" style="display: none;"> Video-to-speech synthesis is the task of reconstructing the speech signal from a silent video of a speaker. Most established approaches to date involve a two-step process, whereby an intermediate representation from the video, such as a spectrogram, is extracted first and then passed to a vocoder to produce the raw audio. Some recent work has focused on end-to-end synthesis, whereby the generation of raw audio and any intermediate representations is performed jointly. All such approaches involve training on data from almost exclusively audio-visual datasets, i.e. every audio sample has a corresponding video sample. This precludes the use of abundant audio-only datasets which may not have a corresponding visual modality (e.g. audiobooks, radio podcasts, speech recognition datasets etc.), as well as audio-only architectures that have been developed by the audio machine learning community over the years. In this paper we propose to train encoder-decoder models on more than 3,500 hours of audio data at 24kHz, and then use the pre-trained decoders to initialize the audio decoders for the video-to-speech synthesis task. The pre-training step uses audio samples only and does not require labels or corresponding samples from other modalities (visual, text). We demonstrate that this pre-training step improves the reconstructed speech and that it is an unexplored way to improve the quality of the generator in a cross-modal task while only requiring samples from one of the modalities. We conduct experiments using both raw audio and mel spectrograms as target outputs and benchmark our models with existing work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.15464v2-abstract-full').style.display = 'none'; document.getElementById('2306.15464v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Corrected typos. This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14053">arXiv:2305.14053</a> <span> [<a href="https://arxiv.org/pdf/2305.14053">pdf</a>, <a href="https://arxiv.org/format/2305.14053">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Parts of Speech-Grounded Subspaces in Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Oldfield%2C+J">James Oldfield</a>, <a href="/search/cs?searchtype=author&query=Tzelepis%2C+C">Christos Tzelepis</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Patras%2C+I">Ioannis Patras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14053v2-abstract-short" style="display: inline;"> Latent image representations arising from vision-language models have proved immensely useful for a variety of downstream tasks. However, their utility is limited by their entanglement with respect to different visual attributes. For instance, recent work has shown that CLIP image representations are often biased toward specific visual properties (such as objects or actions) in an unpredictable ma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14053v2-abstract-full').style.display = 'inline'; document.getElementById('2305.14053v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14053v2-abstract-full" style="display: none;"> Latent image representations arising from vision-language models have proved immensely useful for a variety of downstream tasks. However, their utility is limited by their entanglement with respect to different visual attributes. For instance, recent work has shown that CLIP image representations are often biased toward specific visual properties (such as objects or actions) in an unpredictable manner. In this paper, we propose to separate representations of the different visual modalities in CLIP's joint vision-language space by leveraging the association between parts of speech and specific visual modes of variation (e.g. nouns relate to objects, adjectives describe appearance). This is achieved by formulating an appropriate component analysis model that learns subspaces capturing variability corresponding to a specific part of speech, while jointly minimising variability to the rest. Such a subspace yields disentangled representations of the different visual properties of an image or text in closed form while respecting the underlying geometry of the manifold on which the representations lie. What's more, we show the proposed model additionally facilitates learning subspaces corresponding to specific visual appearances (e.g. artists' painting styles), which enables the selective removal of entire visual themes from CLIP-based text-to-image synthesis. We validate the model both qualitatively, by visualising the subspace projections with a text-to-image model and by preventing the imitation of artists' styles, and quantitatively, through class invariance metrics and improvements to baseline zero-shot classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14053v2-abstract-full').style.display = 'none'; document.getElementById('2305.14053v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.05582">arXiv:2303.05582</a> <span> [<a href="https://arxiv.org/pdf/2303.05582">pdf</a>, <a href="https://arxiv.org/format/2303.05582">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Generalization analysis of an unfolding network for analysis-based Compressed Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kouni%2C+V">Vicky Kouni</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.05582v1-abstract-short" style="display: inline;"> Unfolding networks have shown promising results in the Compressed Sensing (CS) field. Yet, the investigation of their generalization ability is still in its infancy. In this paper, we perform generalization analysis of a state-of-the-art ADMM-based unfolding network, which jointly learns a decoder for CS and a sparsifying redundant analysis operator. To this end, we first impose a structural const… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.05582v1-abstract-full').style.display = 'inline'; document.getElementById('2303.05582v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.05582v1-abstract-full" style="display: none;"> Unfolding networks have shown promising results in the Compressed Sensing (CS) field. Yet, the investigation of their generalization ability is still in its infancy. In this paper, we perform generalization analysis of a state-of-the-art ADMM-based unfolding network, which jointly learns a decoder for CS and a sparsifying redundant analysis operator. To this end, we first impose a structural constraint on the learnable sparsifier, which parametrizes the network's hypothesis class. For the latter, we estimate its Rademacher complexity. With this estimate in hand, we deliver generalization error bounds for the examined network. Finally, the validity of our theory is assessed and numerical comparisons to a state-of-the-art unfolding network are made, on synthetic and real-world datasets. Our experimental results demonstrate that our proposed framework complies with our theoretical findings and outperforms the baseline, consistently for all datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.05582v1-abstract-full').style.display = 'none'; document.getElementById('2303.05582v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.02089">arXiv:2208.02089</a> <span> [<a href="https://arxiv.org/pdf/2208.02089">pdf</a>, <a href="https://arxiv.org/format/2208.02089">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3549737.3549777">10.1145/3549737.3549777 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Unsupervised Discovery of Semantic Concepts in Satellite Imagery with Style-based Wavelet-driven Generative Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kostagiolas%2C+N">Nikos Kostagiolas</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.02089v1-abstract-short" style="display: inline;"> In recent years, considerable advancements have been made in the area of Generative Adversarial Networks (GANs), particularly with the advent of style-based architectures that address many key shortcomings - both in terms of modeling capabilities and network interpretability. Despite these improvements, the adoption of such approaches in the domain of satellite imagery is not straightforward. Typi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02089v1-abstract-full').style.display = 'inline'; document.getElementById('2208.02089v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.02089v1-abstract-full" style="display: none;"> In recent years, considerable advancements have been made in the area of Generative Adversarial Networks (GANs), particularly with the advent of style-based architectures that address many key shortcomings - both in terms of modeling capabilities and network interpretability. Despite these improvements, the adoption of such approaches in the domain of satellite imagery is not straightforward. Typical vision datasets used in generative tasks are well-aligned and annotated, and exhibit limited variability. In contrast, satellite imagery exhibits great spatial and spectral variability, wide presence of fine, high-frequency details, while the tedious nature of annotating satellite imagery leads to annotation scarcity - further motivating developments in unsupervised learning. In this light, we present the first pre-trained style- and wavelet-based GAN model that can readily synthesize a wide gamut of realistic satellite images in a variety of settings and conditions - while also preserving high-frequency information. Furthermore, we show that by analyzing the intermediate activations of our network, one can discover a multitude of interpretable semantic directions that facilitate the guided synthesis of satellite images in terms of high-level concepts (e.g., urbanization) without using any form of supervision. Via a set of qualitative and quantitative experiments we demonstrate the efficacy of our framework, in terms of suitability for downstream tasks (e.g., data augmentation), quality of synthetic imagery, as well as generalization capabilities to unseen datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02089v1-abstract-full').style.display = 'none'; document.getElementById('2208.02089v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 5 figures, accepted at SETN 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.00048">arXiv:2206.00048</a> <span> [<a href="https://arxiv.org/pdf/2206.00048">pdf</a>, <a href="https://arxiv.org/format/2206.00048">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PandA: Unsupervised Learning of Parts and Appearances in the Feature Maps of GANs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Oldfield%2C+J">James Oldfield</a>, <a href="/search/cs?searchtype=author&query=Tzelepis%2C+C">Christos Tzelepis</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Patras%2C+I">Ioannis Patras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.00048v2-abstract-short" style="display: inline;"> Recent advances in the understanding of Generative Adversarial Networks (GANs) have led to remarkable progress in visual editing and synthesis tasks, capitalizing on the rich semantics that are embedded in the latent spaces of pre-trained GANs. However, existing methods are often tailored to specific GAN architectures and are limited to either discovering global semantic directions that do not fac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.00048v2-abstract-full').style.display = 'inline'; document.getElementById('2206.00048v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.00048v2-abstract-full" style="display: none;"> Recent advances in the understanding of Generative Adversarial Networks (GANs) have led to remarkable progress in visual editing and synthesis tasks, capitalizing on the rich semantics that are embedded in the latent spaces of pre-trained GANs. However, existing methods are often tailored to specific GAN architectures and are limited to either discovering global semantic directions that do not facilitate localized control, or require some form of supervision through manually provided regions or segmentation masks. In this light, we present an architecture-agnostic approach that jointly discovers factors representing spatial parts and their appearances in an entirely unsupervised fashion. These factors are obtained by applying a semi-nonnegative tensor factorization on the feature maps, which in turn enables context-aware local image editing with pixel-level control. In addition, we show that the discovered appearance factors correspond to saliency maps that localize concepts of interest, without using any labels. Experiments on a wide range of GAN architectures and datasets show that, in comparison to the state of the art, our method is far more efficient in terms of training time and, most importantly, provides much more accurate localized control. Our code is available at: https://github.com/james-oldfield/PandA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.00048v2-abstract-full').style.display = 'none'; document.getElementById('2206.00048v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICLR 2023. Code available at: https://github.com/james-oldfield/PandA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.07050">arXiv:2205.07050</a> <span> [<a href="https://arxiv.org/pdf/2205.07050">pdf</a>, <a href="https://arxiv.org/format/2205.07050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSP.2023.3272286">10.1109/TSP.2023.3272286 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DECONET: an Unfolding Network for Analysis-based Compressed Sensing with Generalization Error Bounds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kouni%2C+V">Vicky Kouni</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.07050v6-abstract-short" style="display: inline;"> We present a new deep unfolding network for analysis-sparsity-based Compressed Sensing. The proposed network coined Decoding Network (DECONET) jointly learns a decoder that reconstructs vectors from their incomplete, noisy measurements and a redundant sparsifying analysis operator, which is shared across the layers of DECONET. Moreover, we formulate the hypothesis class of DECONET and estimate its… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.07050v6-abstract-full').style.display = 'inline'; document.getElementById('2205.07050v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.07050v6-abstract-full" style="display: none;"> We present a new deep unfolding network for analysis-sparsity-based Compressed Sensing. The proposed network coined Decoding Network (DECONET) jointly learns a decoder that reconstructs vectors from their incomplete, noisy measurements and a redundant sparsifying analysis operator, which is shared across the layers of DECONET. Moreover, we formulate the hypothesis class of DECONET and estimate its associated Rademacher complexity. Then, we use this estimate to deliver meaningful upper bounds for the generalization error of DECONET. Finally, the validity of our theoretical results is assessed and comparisons to state-of-the-art unfolding networks are made, on both synthetic and real-world datasets. Experimental results indicate that our proposed network outperforms the baselines, consistently for all datasets, and its behaviour complies with our theoretical findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.07050v6-abstract-full').style.display = 'none'; document.getElementById('2205.07050v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE Transactions on Signal Processing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.12950">arXiv:2202.12950</a> <span> [<a href="https://arxiv.org/pdf/2202.12950">pdf</a>, <a href="https://arxiv.org/format/2202.12950">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> 2021 BEETL Competition: Advancing Transfer Learning for Subject Independence & Heterogenous EEG Data Sets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xiaoxi Wei</a>, <a href="/search/cs?searchtype=author&query=Faisal%2C+A+A">A. Aldo Faisal</a>, <a href="/search/cs?searchtype=author&query=Grosse-Wentrup%2C+M">Moritz Grosse-Wentrup</a>, <a href="/search/cs?searchtype=author&query=Gramfort%2C+A">Alexandre Gramfort</a>, <a href="/search/cs?searchtype=author&query=Chevallier%2C+S">Sylvain Chevallier</a>, <a href="/search/cs?searchtype=author&query=Jayaram%2C+V">Vinay Jayaram</a>, <a href="/search/cs?searchtype=author&query=Jeunet%2C+C">Camille Jeunet</a>, <a href="/search/cs?searchtype=author&query=Bakas%2C+S">Stylianos Bakas</a>, <a href="/search/cs?searchtype=author&query=Ludwig%2C+S">Siegfried Ludwig</a>, <a href="/search/cs?searchtype=author&query=Barmpas%2C+K">Konstantinos Barmpas</a>, <a href="/search/cs?searchtype=author&query=Bahri%2C+M">Mehdi Bahri</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Laskaris%2C+N">Nikolaos Laskaris</a>, <a href="/search/cs?searchtype=author&query=Adamos%2C+D+A">Dimitrios A. Adamos</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a>, <a href="/search/cs?searchtype=author&query=Duong%2C+W+C">William C. Duong</a>, <a href="/search/cs?searchtype=author&query=Gordon%2C+S+M">Stephen M. Gordon</a>, <a href="/search/cs?searchtype=author&query=Lawhern%2C+V+J">Vernon J. Lawhern</a>, <a href="/search/cs?searchtype=author&query=%C5%9Aliwowski%2C+M">Maciej 艢liwowski</a>, <a href="/search/cs?searchtype=author&query=Rouanne%2C+V">Vincent Rouanne</a>, <a href="/search/cs?searchtype=author&query=Tempczyk%2C+P">Piotr Tempczyk</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.12950v1-abstract-short" style="display: inline;"> Transfer learning and meta-learning offer some of the most promising avenues to unlock the scalability of healthcare and consumer technologies driven by biosignal data. This is because current methods cannot generalise well across human subjects' data and handle learning from different heterogeneously collected data sets, thus limiting the scale of training data. On the other side, developments in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12950v1-abstract-full').style.display = 'inline'; document.getElementById('2202.12950v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.12950v1-abstract-full" style="display: none;"> Transfer learning and meta-learning offer some of the most promising avenues to unlock the scalability of healthcare and consumer technologies driven by biosignal data. This is because current methods cannot generalise well across human subjects' data and handle learning from different heterogeneously collected data sets, thus limiting the scale of training data. On the other side, developments in transfer learning would benefit significantly from a real-world benchmark with immediate practical application. Therefore, we pick electroencephalography (EEG) as an exemplar for what makes biosignal machine learning hard. We design two transfer learning challenges around diagnostics and Brain-Computer-Interfacing (BCI), that have to be solved in the face of low signal-to-noise ratios, major variability among subjects, differences in the data recording sessions and techniques, and even between the specific BCI tasks recorded in the dataset. Task 1 is centred on the field of medical diagnostics, addressing automatic sleep stage annotation across subjects. Task 2 is centred on Brain-Computer Interfacing (BCI), addressing motor imagery decoding across both subjects and data sets. The BEETL competition with its over 30 competing teams and its 3 winning entries brought attention to the potential of deep transfer learning and combinations of set theory and conventional machine learning techniques to overcome the challenges. The results set a new state-of-the-art for the real-world BEETL benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12950v1-abstract-full').style.display = 'none'; document.getElementById('2202.12950v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">PrePrint of the NeurIPS2021 BEETL Competition Submitted to Proceedings of Machine Learning Research (PMLR)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.03267">arXiv:2202.03267</a> <span> [<a href="https://arxiv.org/pdf/2202.03267">pdf</a>, <a href="https://arxiv.org/format/2202.03267">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Team Cogitat at NeurIPS 2021: Benchmarks for EEG Transfer Learning Competition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bakas%2C+S">Stylianos Bakas</a>, <a href="/search/cs?searchtype=author&query=Ludwig%2C+S">Siegfried Ludwig</a>, <a href="/search/cs?searchtype=author&query=Barmpas%2C+K">Konstantinos Barmpas</a>, <a href="/search/cs?searchtype=author&query=Bahri%2C+M">Mehdi Bahri</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Laskaris%2C+N">Nikolaos Laskaris</a>, <a href="/search/cs?searchtype=author&query=Adamos%2C+D+A">Dimitrios A. Adamos</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.03267v1-abstract-short" style="display: inline;"> Building subject-independent deep learning models for EEG decoding faces the challenge of strong covariate-shift across different datasets, subjects and recording sessions. Our approach to address this difficulty is to explicitly align feature distributions at various layers of the deep learning model, using both simple statistical techniques as well as trainable methods with more representational… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.03267v1-abstract-full').style.display = 'inline'; document.getElementById('2202.03267v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.03267v1-abstract-full" style="display: none;"> Building subject-independent deep learning models for EEG decoding faces the challenge of strong covariate-shift across different datasets, subjects and recording sessions. Our approach to address this difficulty is to explicitly align feature distributions at various layers of the deep learning model, using both simple statistical techniques as well as trainable methods with more representational capacity. This follows in a similar vein as covariance-based alignment methods, often used in a Riemannian manifold context. The methodology proposed herein won first place in the 2021 Benchmarks in EEG Transfer Learning (BEETL) competition, hosted at the NeurIPS conference. The first task of the competition consisted of sleep stage classification, which required the transfer of models trained on younger subjects to perform inference on multiple subjects of older age groups without personalized calibration data, requiring subject-independent models. The second task required to transfer models trained on the subjects of one or more source motor imagery datasets to perform inference on two target datasets, providing a small set of personalized calibration data for multiple test subjects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.03267v1-abstract-full').style.display = 'none'; document.getElementById('2202.03267v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.12911">arXiv:2112.12911</a> <span> [<a href="https://arxiv.org/pdf/2112.12911">pdf</a>, <a href="https://arxiv.org/format/2112.12911">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cluster-guided Image Synthesis with Unconditional Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Georgopoulos%2C+M">Markos Georgopoulos</a>, <a href="/search/cs?searchtype=author&query=Oldfield%2C+J">James Oldfield</a>, <a href="/search/cs?searchtype=author&query=Chrysos%2C+G+G">Grigorios G Chrysos</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.12911v1-abstract-short" style="display: inline;"> Generative Adversarial Networks (GANs) are the driving force behind the state-of-the-art in image generation. Despite their ability to synthesize high-resolution photo-realistic images, generating content with on-demand conditioning of different granularity remains a challenge. This challenge is usually tackled by annotating massive datasets with the attributes of interest, a laborious task that i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.12911v1-abstract-full').style.display = 'inline'; document.getElementById('2112.12911v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.12911v1-abstract-full" style="display: none;"> Generative Adversarial Networks (GANs) are the driving force behind the state-of-the-art in image generation. Despite their ability to synthesize high-resolution photo-realistic images, generating content with on-demand conditioning of different granularity remains a challenge. This challenge is usually tackled by annotating massive datasets with the attributes of interest, a laborious task that is not always a viable option. Therefore, it is vital to introduce control into the generation process of unsupervised generative models. In this work, we focus on controllable image generation by leveraging GANs that are well-trained in an unsupervised fashion. To this end, we discover that the representation space of intermediate layers of the generator forms a number of clusters that separate the data according to semantically meaningful attributes (e.g., hair color and pose). By conditioning on the cluster assignments, the proposed method is able to control the semantic class of the generated image. Our approach enables sampling from each cluster by Implicit Maximum Likelihood Estimation (IMLE). We showcase the efficacy of our approach on faces (CelebA-HQ and FFHQ), animals (Imagenet) and objects (LSUN) using different pre-trained generative models. The results highlight the ability of our approach to condition image generation on attributes like gender, pose and hair style on faces, as well as a variety of features on different object classes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.12911v1-abstract-full').style.display = 'none'; document.getElementById('2112.12911v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.11736">arXiv:2111.11736</a> <span> [<a href="https://arxiv.org/pdf/2111.11736">pdf</a>, <a href="https://arxiv.org/format/2111.11736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Tensor Component Analysis for Interpreting the Latent Space of GANs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Oldfield%2C+J">James Oldfield</a>, <a href="/search/cs?searchtype=author&query=Georgopoulos%2C+M">Markos Georgopoulos</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Patras%2C+I">Ioannis Patras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.11736v1-abstract-short" style="display: inline;"> This paper addresses the problem of finding interpretable directions in the latent space of pre-trained Generative Adversarial Networks (GANs) to facilitate controllable image synthesis. Such interpretable directions correspond to transformations that can affect both the style and geometry of the synthetic images. However, existing approaches that utilise linear techniques to find these transforma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.11736v1-abstract-full').style.display = 'inline'; document.getElementById('2111.11736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.11736v1-abstract-full" style="display: none;"> This paper addresses the problem of finding interpretable directions in the latent space of pre-trained Generative Adversarial Networks (GANs) to facilitate controllable image synthesis. Such interpretable directions correspond to transformations that can affect both the style and geometry of the synthetic images. However, existing approaches that utilise linear techniques to find these transformations often fail to provide an intuitive way to separate these two sources of variation. To address this, we propose to a) perform a multilinear decomposition of the tensor of intermediate representations, and b) use a tensor-based regression to map directions found using this decomposition to the latent space. Our scheme allows for both linear edits corresponding to the individual modes of the tensor, and non-linear ones that model the multiplicative interactions between them. We show experimentally that we can utilise the former to better separate style- from geometry-based transformations, and the latter to generate an extended set of possible transformations in comparison to prior works. We demonstrate our approach's efficacy both quantitatively and qualitatively compared to the current state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.11736v1-abstract-full').style.display = 'none'; document.getElementById('2111.11736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">BMVC 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.13859">arXiv:2110.13859</a> <span> [<a href="https://arxiv.org/pdf/2110.13859">pdf</a>, <a href="https://arxiv.org/format/2110.13859">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Defensive Tensorization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bulat%2C+A">Adrian Bulat</a>, <a href="/search/cs?searchtype=author&query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&query=Bhattacharya%2C+S">Sourav Bhattacharya</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Hospedales%2C+T">Timothy Hospedales</a>, <a href="/search/cs?searchtype=author&query=Tzimiropoulos%2C+G">Georgios Tzimiropoulos</a>, <a href="/search/cs?searchtype=author&query=Lane%2C+N+D">Nicholas D Lane</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.13859v1-abstract-short" style="display: inline;"> We propose defensive tensorization, an adversarial defence technique that leverages a latent high-order factorization of the network. The layers of a network are first expressed as factorized tensor layers. Tensor dropout is then applied in the latent subspace, therefore resulting in dense reconstructed weights, without the sparsity or perturbations typically induced by the randomization.Our appro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13859v1-abstract-full').style.display = 'inline'; document.getElementById('2110.13859v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.13859v1-abstract-full" style="display: none;"> We propose defensive tensorization, an adversarial defence technique that leverages a latent high-order factorization of the network. The layers of a network are first expressed as factorized tensor layers. Tensor dropout is then applied in the latent subspace, therefore resulting in dense reconstructed weights, without the sparsity or perturbations typically induced by the randomization.Our approach can be readily integrated with any arbitrary neural architecture and combined with techniques like adversarial training. We empirically demonstrate the effectiveness of our approach on standard image classification benchmarks. We validate the versatility of our approach across domains and low-precision architectures by considering an audio classification task and binary networks. In all cases, we demonstrate improved performance compared to prior works. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13859v1-abstract-full').style.display = 'none'; document.getElementById('2110.13859v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be presented at BMVC 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.10009">arXiv:2110.10009</a> <span> [<a href="https://arxiv.org/pdf/2110.10009">pdf</a>, <a href="https://arxiv.org/format/2110.10009">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> EEGminer: Discovering Interpretable Features of Brain Activity with Learnable Filters </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ludwig%2C+S">Siegfried Ludwig</a>, <a href="/search/cs?searchtype=author&query=Bakas%2C+S">Stylianos Bakas</a>, <a href="/search/cs?searchtype=author&query=Adamos%2C+D+A">Dimitrios A. Adamos</a>, <a href="/search/cs?searchtype=author&query=Laskaris%2C+N">Nikolaos Laskaris</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.10009v2-abstract-short" style="display: inline;"> Patterns of brain activity are associated with different brain processes and can be used to identify different brain states and make behavioral predictions. However, the relevant features are not readily apparent and accessible. To mine informative latent representations from multichannel recordings of ongoing EEG activity, we propose a novel differentiable decoding pipeline consisting of learnabl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.10009v2-abstract-full').style.display = 'inline'; document.getElementById('2110.10009v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.10009v2-abstract-full" style="display: none;"> Patterns of brain activity are associated with different brain processes and can be used to identify different brain states and make behavioral predictions. However, the relevant features are not readily apparent and accessible. To mine informative latent representations from multichannel recordings of ongoing EEG activity, we propose a novel differentiable decoding pipeline consisting of learnable filters and a pre-determined feature extraction module. Specifically, we introduce filters parameterized by generalized Gaussian functions that offer a smooth derivative for stable end-to-end model training and allow for learning interpretable features. For the feature module, we use signal magnitude and functional connectivity estimates. We demonstrate the utility of our model towards emotion recognition from EEG signals on the SEED dataset, as well as on a new EEG dataset of unprecedented size (i.e., 761 subjects), where we identify consistent trends of music perception and related individual differences. The discovered features align with previous neuroscience studies and offer new insights, such as marked differences in the functional connectivity profile between left and right temporal areas during music listening. This agrees with the respective specialisation of the temporal lobes regarding music perception proposed in the literature. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.10009v2-abstract-full').style.display = 'none'; document.getElementById('2110.10009v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.03436">arXiv:2107.03436</a> <span> [<a href="https://arxiv.org/pdf/2107.03436">pdf</a>, <a href="https://arxiv.org/format/2107.03436">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JPROC.2021.3074329">10.1109/JPROC.2021.3074329 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Tensor Methods in Computer Vision and Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&query=Chrysos%2C+G+G">Grigorios G. Chrysos</a>, <a href="/search/cs?searchtype=author&query=Oldfield%2C+J">James Oldfield</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Anandkumar%2C+A">Anima Anandkumar</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.03436v1-abstract-short" style="display: inline;"> Tensors, or multidimensional arrays, are data structures that can naturally represent visual data of multiple dimensions. Inherently able to efficiently capture structured, latent semantic spaces and high-order interactions, tensors have a long history of applications in a wide span of computer vision problems. With the advent of the deep learning paradigm shift in computer vision, tensors have be… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.03436v1-abstract-full').style.display = 'inline'; document.getElementById('2107.03436v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.03436v1-abstract-full" style="display: none;"> Tensors, or multidimensional arrays, are data structures that can naturally represent visual data of multiple dimensions. Inherently able to efficiently capture structured, latent semantic spaces and high-order interactions, tensors have a long history of applications in a wide span of computer vision problems. With the advent of the deep learning paradigm shift in computer vision, tensors have become even more fundamental. Indeed, essential ingredients in modern deep learning architectures, such as convolutions and attention mechanisms, can readily be considered as tensor mappings. In effect, tensor methods are increasingly finding significant applications in deep learning, including the design of memory and compute efficient network architectures, improving robustness to random noise and adversarial attacks, and aiding the theoretical understanding of deep networks. This article provides an in-depth and practical review of tensors and tensor methods in the context of representation learning and deep learning, with a particular focus on visual data analysis and computer vision applications. Concretely, besides fundamental work in tensor-based visual data analysis methods, we focus on recent developments that have brought on a gradual increase of tensor methods, especially in deep learning architectures, and their implications in computer vision applications. To further enable the newcomer to grasp such concepts quickly, we provide companion Python notebooks, covering key aspects of the paper and implementing them, step-by-step with TensorLy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.03436v1-abstract-full').style.display = 'none'; document.getElementById('2107.03436v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings of the IEEE (2021)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.07916">arXiv:2104.07916</a> <span> [<a href="https://arxiv.org/pdf/2104.07916">pdf</a>, <a href="https://arxiv.org/format/2104.07916">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Augmenting Deep Classifiers with Polynomial Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chrysos%2C+G+G">Grigorios G Chrysos</a>, <a href="/search/cs?searchtype=author&query=Georgopoulos%2C+M">Markos Georgopoulos</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiankang Deng</a>, <a href="/search/cs?searchtype=author&query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Anandkumar%2C+A">Anima Anandkumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.07916v2-abstract-short" style="display: inline;"> Deep neural networks have been the driving force behind the success in classification tasks, e.g., object and audio recognition. Impressive results and generalization have been achieved by a variety of recently proposed architectures, the majority of which are seemingly disconnected. In this work, we cast the study of deep classifiers under a unifying framework. In particular, we express state-of-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.07916v2-abstract-full').style.display = 'inline'; document.getElementById('2104.07916v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.07916v2-abstract-full" style="display: none;"> Deep neural networks have been the driving force behind the success in classification tasks, e.g., object and audio recognition. Impressive results and generalization have been achieved by a variety of recently proposed architectures, the majority of which are seemingly disconnected. In this work, we cast the study of deep classifiers under a unifying framework. In particular, we express state-of-the-art architectures (e.g., residual and non-local networks) in the form of different degree polynomials of the input. Our framework provides insights on the inductive biases of each model and enables natural extensions building upon their polynomial nature. The efficacy of the proposed models is evaluated on standard image and audio classification benchmarks. The expressivity of the proposed models is highlighted both in terms of increased model performance as well as model compression. Lastly, the extensions allowed by this taxonomy showcase benefits in the presence of limited data and long-tailed data distributions. We expect this taxonomy to provide links between existing domain-specific architectures. The source code is available at \url{https://github.com/grigorisg9gr/polynomials-for-augmenting-NNs}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.07916v2-abstract-full').style.display = 'none'; document.getElementById('2104.07916v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ECCV'22</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.05077">arXiv:2104.05077</a> <span> [<a href="https://arxiv.org/pdf/2104.05077">pdf</a>, <a href="https://arxiv.org/format/2104.05077">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CoPE: Conditional image generation using Polynomial Expansions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chrysos%2C+G+G">Grigorios G Chrysos</a>, <a href="/search/cs?searchtype=author&query=Georgopoulos%2C+M">Markos Georgopoulos</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.05077v3-abstract-short" style="display: inline;"> Generative modeling has evolved to a notable field of machine learning. Deep polynomial neural networks (PNNs) have demonstrated impressive results in unsupervised image generation, where the task is to map an input vector (i.e., noise) to a synthesized image. However, the success of PNNs has not been replicated in conditional generation tasks, such as super-resolution. Existing PNNs focus on sing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.05077v3-abstract-full').style.display = 'inline'; document.getElementById('2104.05077v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.05077v3-abstract-full" style="display: none;"> Generative modeling has evolved to a notable field of machine learning. Deep polynomial neural networks (PNNs) have demonstrated impressive results in unsupervised image generation, where the task is to map an input vector (i.e., noise) to a synthesized image. However, the success of PNNs has not been replicated in conditional generation tasks, such as super-resolution. Existing PNNs focus on single-variable polynomial expansions which do not fare well to two-variable inputs, i.e., the noise variable and the conditional variable. In this work, we introduce a general framework, called CoPE, that enables a polynomial expansion of two input variables and captures their auto- and cross-correlations. We exhibit how CoPE can be trivially augmented to accept an arbitrary number of input variables. CoPE is evaluated in five tasks (class-conditional generation, inverse problems, edges-to-image translation, image-to-image translation, attribute-guided generation) involving eight datasets. The thorough evaluation suggests that CoPE can be useful for tackling diverse conditional generation tasks. The source code of CoPE is available at \url{https://github.com/grigorisg9gr/polynomial_nets_for_conditional_generation}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.05077v3-abstract-full').style.display = 'none'; document.getElementById('2104.05077v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in NeurIPS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.04075">arXiv:2009.04075</a> <span> [<a href="https://arxiv.org/pdf/2009.04075">pdf</a>, <a href="https://arxiv.org/format/2009.04075">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Multilinear Latent Conditioning for Generating Unseen Attribute Combinations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Georgopoulos%2C+M">Markos Georgopoulos</a>, <a href="/search/cs?searchtype=author&query=Chrysos%2C+G">Grigorios Chrysos</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.04075v1-abstract-short" style="display: inline;"> Deep generative models rely on their inductive bias to facilitate generalization, especially for problems with high dimensional data, like images. However, empirical studies have shown that variational autoencoders (VAE) and generative adversarial networks (GAN) lack the generalization ability that occurs naturally in human perception. For example, humans can visualize a woman smiling after only s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.04075v1-abstract-full').style.display = 'inline'; document.getElementById('2009.04075v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.04075v1-abstract-full" style="display: none;"> Deep generative models rely on their inductive bias to facilitate generalization, especially for problems with high dimensional data, like images. However, empirical studies have shown that variational autoencoders (VAE) and generative adversarial networks (GAN) lack the generalization ability that occurs naturally in human perception. For example, humans can visualize a woman smiling after only seeing a smiling man. On the contrary, the standard conditional VAE (cVAE) is unable to generate unseen attribute combinations. To this end, we extend cVAE by introducing a multilinear latent conditioning framework that captures the multiplicative interactions between the attributes. We implement two variants of our model and demonstrate their efficacy on MNIST, Fashion-MNIST and CelebA. Altogether, we design a novel conditioning framework that can be used with any architecture to synthesize unseen attribute combinations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.04075v1-abstract-full').style.display = 'none'; document.getElementById('2009.04075v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">published at International Conference on Machine Learning 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.13026">arXiv:2006.13026</a> <span> [<a href="https://arxiv.org/pdf/2006.13026">pdf</a>, <a href="https://arxiv.org/format/2006.13026">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TPAMI.2021.3058891">10.1109/TPAMI.2021.3058891 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep Polynomial Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chrysos%2C+G">Grigorios Chrysos</a>, <a href="/search/cs?searchtype=author&query=Moschoglou%2C+S">Stylianos Moschoglou</a>, <a href="/search/cs?searchtype=author&query=Bouritsas%2C+G">Giorgos Bouritsas</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiankang Deng</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.13026v2-abstract-short" style="display: inline;"> Deep Convolutional Neural Networks (DCNNs) are currently the method of choice both for generative, as well as for discriminative learning in computer vision and machine learning. The success of DCNNs can be attributed to the careful selection of their building blocks (e.g., residual blocks, rectifiers, sophisticated normalization schemes, to mention but a few). In this paper, we propose $螤$-Nets,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.13026v2-abstract-full').style.display = 'inline'; document.getElementById('2006.13026v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.13026v2-abstract-full" style="display: none;"> Deep Convolutional Neural Networks (DCNNs) are currently the method of choice both for generative, as well as for discriminative learning in computer vision and machine learning. The success of DCNNs can be attributed to the careful selection of their building blocks (e.g., residual blocks, rectifiers, sophisticated normalization schemes, to mention but a few). In this paper, we propose $螤$-Nets, a new class of function approximators based on polynomial expansions. $螤$-Nets are polynomial neural networks, i.e., the output is a high-order polynomial of the input. The unknown parameters, which are naturally represented by high-order tensors, are estimated through a collective tensor factorization with factors sharing. We introduce three tensor decompositions that significantly reduce the number of parameters and show how they can be efficiently implemented by hierarchical neural networks. We empirically demonstrate that $螤$-Nets are very expressive and they even produce good results without the use of non-linear activation functions in a large battery of tasks and signals, i.e., images, graphs, and audio. When used in conjunction with activation functions, $螤$-Nets produce state-of-the-art results in three challenging tasks, i.e. image generation, face verification and 3D mesh representation learning. The source code is available at \url{https://github.com/grigorisg9gr/polynomial_nets}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.13026v2-abstract-full').style.display = 'none'; document.getElementById('2006.13026v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in IEEE Transactions on Pattern Analysis and Machine Intelligence (T-PAMI). Code: https://github.com/grigorisg9gr/polynomial_nets. arXiv admin note: substantial text overlap with arXiv:2003.03828</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.03985">arXiv:2006.03985</a> <span> [<a href="https://arxiv.org/pdf/2006.03985">pdf</a>, <a href="https://arxiv.org/format/2006.03985">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Facial Data Diversity with Style-based Face Aging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Georgopoulos%2C+M">Markos Georgopoulos</a>, <a href="/search/cs?searchtype=author&query=Oldfield%2C+J">James Oldfield</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.03985v1-abstract-short" style="display: inline;"> A significant limiting factor in training fair classifiers relates to the presence of dataset bias. In particular, face datasets are typically biased in terms of attributes such as gender, age, and race. If not mitigated, bias leads to algorithms that exhibit unfair behaviour towards such groups. In this work, we address the problem of increasing the diversity of face datasets with respect to age.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.03985v1-abstract-full').style.display = 'inline'; document.getElementById('2006.03985v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.03985v1-abstract-full" style="display: none;"> A significant limiting factor in training fair classifiers relates to the presence of dataset bias. In particular, face datasets are typically biased in terms of attributes such as gender, age, and race. If not mitigated, bias leads to algorithms that exhibit unfair behaviour towards such groups. In this work, we address the problem of increasing the diversity of face datasets with respect to age. Concretely, we propose a novel, generative style-based architecture for data augmentation that captures fine-grained aging patterns by conditioning on multi-resolution age-discriminative representations. By evaluating on several age-annotated datasets in both single- and cross-database experiments, we show that the proposed method outperforms state-of-the-art algorithms for age transfer, especially in the case of age groups that lie in the tails of the label distribution. We further show significantly increased diversity in the augmented datasets, outperforming all compared methods according to established metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.03985v1-abstract-full').style.display = 'none'; document.getElementById('2006.03985v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE CVPR 2020 WORKSHOP ON FAIR, DATA EFFICIENT AND TRUSTED COMPUTER VISION</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.07302">arXiv:2005.07302</a> <span> [<a href="https://arxiv.org/pdf/2005.07302">pdf</a>, <a href="https://arxiv.org/format/2005.07302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Investigating Bias in Deep Face Analysis: The KANFace Dataset and Empirical Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Georgopoulos%2C+M">Markos Georgopoulos</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.07302v2-abstract-short" style="display: inline;"> Deep learning-based methods have pushed the limits of the state-of-the-art in face analysis. However, despite their success, these models have raised concerns regarding their bias towards certain demographics. This bias is inflicted both by limited diversity across demographics in the training set, as well as the design of the algorithms. In this work, we investigate the demographic bias of deep l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.07302v2-abstract-full').style.display = 'inline'; document.getElementById('2005.07302v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.07302v2-abstract-full" style="display: none;"> Deep learning-based methods have pushed the limits of the state-of-the-art in face analysis. However, despite their success, these models have raised concerns regarding their bias towards certain demographics. This bias is inflicted both by limited diversity across demographics in the training set, as well as the design of the algorithms. In this work, we investigate the demographic bias of deep learning models in face recognition, age estimation, gender recognition and kinship verification. To this end, we introduce the most comprehensive, large-scale dataset of facial images and videos to date. It consists of 40K still images and 44K sequences (14.5M video frames in total) captured in unconstrained, real-world conditions from 1,045 subjects. The data are manually annotated in terms of identity, exact age, gender and kinship. The performance of state-of-the-art models is scrutinized and demographic bias is exposed by conducting a series of experiments. Lastly, a method to debias network embeddings is introduced and tested on the proposed benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.07302v2-abstract-full').style.display = 'none'; document.getElementById('2005.07302v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.03828">arXiv:2003.03828</a> <span> [<a href="https://arxiv.org/pdf/2003.03828">pdf</a>, <a href="https://arxiv.org/format/2003.03828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> $螤-$nets: Deep Polynomial Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chrysos%2C+G+G">Grigorios G. Chrysos</a>, <a href="/search/cs?searchtype=author&query=Moschoglou%2C+S">Stylianos Moschoglou</a>, <a href="/search/cs?searchtype=author&query=Bouritsas%2C+G">Giorgos Bouritsas</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiankang Deng</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.03828v2-abstract-short" style="display: inline;"> Deep Convolutional Neural Networks (DCNNs) is currently the method of choice both for generative, as well as for discriminative learning in computer vision and machine learning. The success of DCNNs can be attributed to the careful selection of their building blocks (e.g., residual blocks, rectifiers, sophisticated normalization schemes, to mention but a few). In this paper, we propose $螤$-Nets, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.03828v2-abstract-full').style.display = 'inline'; document.getElementById('2003.03828v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.03828v2-abstract-full" style="display: none;"> Deep Convolutional Neural Networks (DCNNs) is currently the method of choice both for generative, as well as for discriminative learning in computer vision and machine learning. The success of DCNNs can be attributed to the careful selection of their building blocks (e.g., residual blocks, rectifiers, sophisticated normalization schemes, to mention but a few). In this paper, we propose $螤$-Nets, a new class of DCNNs. $螤$-Nets are polynomial neural networks, i.e., the output is a high-order polynomial of the input. $螤$-Nets can be implemented using special kind of skip connections and their parameters can be represented via high-order tensors. We empirically demonstrate that $螤$-Nets have better representation power than standard DCNNs and they even produce good results without the use of non-linear activation functions in a large battery of tasks and signals, i.e., images, graphs, and audio. When used in conjunction with activation functions, $螤$-Nets produce state-of-the-art results in challenging tasks, such as image generation. Lastly, our framework elucidates why recent generative models, such as StyleGAN, improve upon their predecessors, e.g., ProGAN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.03828v2-abstract-full').style.display = 'none'; document.getElementById('2003.03828v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in CVPR 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.05833">arXiv:1912.05833</a> <span> [<a href="https://arxiv.org/pdf/1912.05833">pdf</a>, <a href="https://arxiv.org/format/1912.05833">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Speech-driven facial animation using polynomial fusion of features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kefalas%2C+T">Triantafyllos Kefalas</a>, <a href="/search/cs?searchtype=author&query=Vougioukas%2C+K">Konstantinos Vougioukas</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Petridis%2C+S">Stavros Petridis</a>, <a href="/search/cs?searchtype=author&query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.05833v2-abstract-short" style="display: inline;"> Speech-driven facial animation involves using a speech signal to generate realistic videos of talking faces. Recent deep learning approaches to facial synthesis rely on extracting low-dimensional representations and concatenating them, followed by a decoding step of the concatenated vector. This accounts for only first-order interactions of the features and ignores higher-order interactions. In th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.05833v2-abstract-full').style.display = 'inline'; document.getElementById('1912.05833v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.05833v2-abstract-full" style="display: none;"> Speech-driven facial animation involves using a speech signal to generate realistic videos of talking faces. Recent deep learning approaches to facial synthesis rely on extracting low-dimensional representations and concatenating them, followed by a decoding step of the concatenated vector. This accounts for only first-order interactions of the features and ignores higher-order interactions. In this paper we propose a polynomial fusion layer that models the joint representation of the encodings by a higher-order polynomial, with the parameters modelled by a tensor decomposition. We demonstrate the suitability of this approach through experiments on generated videos evaluated on a range of metrics on video quality, audiovisual synchronisation and generation of blinks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.05833v2-abstract-full').style.display = 'none'; document.getElementById('1912.05833v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1908.06571">arXiv:1908.06571</a> <span> [<a href="https://arxiv.org/pdf/1908.06571">pdf</a>, <a href="https://arxiv.org/format/1908.06571">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> PolyGAN: High-Order Polynomial Generators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chrysos%2C+G">Grigorios Chrysos</a>, <a href="/search/cs?searchtype=author&query=Moschoglou%2C+S">Stylianos Moschoglou</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1908.06571v2-abstract-short" style="display: inline;"> Generative Adversarial Networks (GANs) have become the gold standard when it comes to learning generative models for high-dimensional distributions. Since their advent, numerous variations of GANs have been introduced in the literature, primarily focusing on utilization of novel loss functions, optimization/regularization strategies and network architectures. In this paper, we turn our attention t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.06571v2-abstract-full').style.display = 'inline'; document.getElementById('1908.06571v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1908.06571v2-abstract-full" style="display: none;"> Generative Adversarial Networks (GANs) have become the gold standard when it comes to learning generative models for high-dimensional distributions. Since their advent, numerous variations of GANs have been introduced in the literature, primarily focusing on utilization of novel loss functions, optimization/regularization strategies and network architectures. In this paper, we turn our attention to the generator and investigate the use of high-order polynomials as an alternative class of universal function approximators. Concretely, we propose PolyGAN, where we model the data generator by means of a high-order polynomial whose unknown parameters are naturally represented by high-order tensors. We introduce two tensor decompositions that significantly reduce the number of parameters and show how they can be efficiently implemented by hierarchical neural networks that only employ linear/convolutional blocks. We exhibit for the first time that by using our approach a GAN generator can approximate the data distribution without using any activation functions. Thorough experimental evaluation on both synthetic and real data (images and 3D point clouds) demonstrates the merits of PolyGAN against the state of the art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.06571v2-abstract-full').style.display = 'none'; document.getElementById('1908.06571v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.06196">arXiv:1906.06196</a> <span> [<a href="https://arxiv.org/pdf/1906.06196">pdf</a>, <a href="https://arxiv.org/format/1906.06196">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Factorized Higher-Order CNNs with an Application to Spatio-Temporal Emotion Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&query=Toisoul%2C+A">Antoine Toisoul</a>, <a href="/search/cs?searchtype=author&query=Bulat%2C+A">Adrian Bulat</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Hospedales%2C+T">Timothy Hospedales</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.06196v2-abstract-short" style="display: inline;"> Training deep neural networks with spatio-temporal (i.e., 3D) or multidimensional convolutions of higher-order is computationally challenging due to millions of unknown parameters across dozens of layers. To alleviate this, one approach is to apply low-rank tensor decompositions to convolution kernels in order to compress the network and reduce its number of parameters. Alternatively, new convolut… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.06196v2-abstract-full').style.display = 'inline'; document.getElementById('1906.06196v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.06196v2-abstract-full" style="display: none;"> Training deep neural networks with spatio-temporal (i.e., 3D) or multidimensional convolutions of higher-order is computationally challenging due to millions of unknown parameters across dozens of layers. To alleviate this, one approach is to apply low-rank tensor decompositions to convolution kernels in order to compress the network and reduce its number of parameters. Alternatively, new convolutional blocks, such as MobileNet, can be directly designed for efficiency. In this paper, we unify these two approaches by proposing a tensor factorization framework for efficient multidimensional (separable) convolutions of higher-order. Interestingly, the proposed framework enables a novel higher-order transduction, allowing to train a network on a given domain (e.g., 2D images or N-dimensional data in general) and using transduction to generalize to higher-order data such as videos (or (N+K)-dimensional data in general), capturing for instance temporal dynamics while preserving the learnt spatial information. We apply the proposed methodology, coined CP-Higher-Order Convolution (HO-CPConv), to spatio-temporal facial emotion analysis. Most existing facial affect models focus on static imagery and discard all temporal information. This is due to the above-mentioned burden of training 3D convolutional nets and the lack of large bodies of video data annotated by experts. We address both issues with our proposed framework. Initial training is first done on static imagery before using transduction to generalize to the temporal domain. We demonstrate superior performance on three challenging large scale affect estimation datasets, AffectNet, SEWA, and AFEW-VA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.06196v2-abstract-full').style.display = 'none'; document.getElementById('1906.06196v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE CVPR 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.04772">arXiv:1904.04772</a> <span> [<a href="https://arxiv.org/pdf/1904.04772">pdf</a>, <a href="https://arxiv.org/format/1904.04772">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Learning of Disentangled and Generalizable Representations for Visual Attributes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Oldfield%2C+J">James Oldfield</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M+A">Mihalis A. Nicolaou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.04772v3-abstract-short" style="display: inline;"> Recently, a multitude of methods for image-to-image translation have demonstrated impressive results on problems such as multi-domain or multi-attribute transfer. The vast majority of such works leverages the strengths of adversarial learning and deep convolutional autoencoders to achieve realistic results by well-capturing the target data distribution. Nevertheless, the most prominent representat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.04772v3-abstract-full').style.display = 'inline'; document.getElementById('1904.04772v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.04772v3-abstract-full" style="display: none;"> Recently, a multitude of methods for image-to-image translation have demonstrated impressive results on problems such as multi-domain or multi-attribute transfer. The vast majority of such works leverages the strengths of adversarial learning and deep convolutional autoencoders to achieve realistic results by well-capturing the target data distribution. Nevertheless, the most prominent representatives of this class of methods do not facilitate semantic structure in the latent space, and usually rely on binary domain labels for test-time transfer. This leads to rigid models, unable to capture the variance of each domain label. In this light, we propose a novel adversarial learning method that (i) facilitates the emergence of latent structure by semantically disentangling sources of variation, and (ii) encourages learning generalizable, continuous, and transferable latent codes that enable flexible attribute mixing. This is achieved by introducing a novel loss function that encourages representations to result in uniformly distributed class posteriors for disentangled attributes. In tandem with an algorithm for inducing generalizable properties, the resulting representations can be utilized for a variety of tasks such as intensity-preserving multi-attribute image translation and synthesis, without requiring labelled test data. We demonstrate the merits of the proposed method by a set of qualitative and quantitative experiments on popular databases such as MultiPIE, RaFD, and BU-3DFE, where our method outperforms other, state-of-the-art methods in tasks such as intensity-preserving multi-attribute transfer and synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.04772v3-abstract-full').style.display = 'none'; document.getElementById('1904.04772v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1902.10758">arXiv:1902.10758</a> <span> [<a href="https://arxiv.org/pdf/1902.10758">pdf</a>, <a href="https://arxiv.org/format/1902.10758">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Tensor Dropout for Robust Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kolbeinsson%2C+A">Arinbj枚rn Kolbeinsson</a>, <a href="/search/cs?searchtype=author&query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Bulat%2C+A">Adrian Bulat</a>, <a href="/search/cs?searchtype=author&query=Anandkumar%2C+A">Anima Anandkumar</a>, <a href="/search/cs?searchtype=author&query=Tzoulaki%2C+I">Ioanna Tzoulaki</a>, <a href="/search/cs?searchtype=author&query=Matthews%2C+P">Paul Matthews</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1902.10758v4-abstract-short" style="display: inline;"> CNNs achieve remarkable performance by leveraging deep, over-parametrized architectures, trained on large datasets. However, they have limited generalization ability to data outside the training domain, and a lack of robustness to noise and adversarial attacks. By building better inductive biases, we can improve robustness and also obtain smaller networks that are more memory and computationally e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.10758v4-abstract-full').style.display = 'inline'; document.getElementById('1902.10758v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1902.10758v4-abstract-full" style="display: none;"> CNNs achieve remarkable performance by leveraging deep, over-parametrized architectures, trained on large datasets. However, they have limited generalization ability to data outside the training domain, and a lack of robustness to noise and adversarial attacks. By building better inductive biases, we can improve robustness and also obtain smaller networks that are more memory and computationally efficient. While standard CNNs use matrix computations, we study tensor layers that involve higher-order computations and provide better inductive bias. Specifically, we impose low-rank tensor structures on the weights of tensor regression layers to obtain compact networks, and propose tensor dropout, a randomization in the tensor rank for robustness. We show that our approach outperforms other methods for large-scale image classification on ImageNet and CIFAR-100. We establish a new state-of-the-art accuracy for phenotypic trait prediction on the largest dataset of brain MRI, the UK Biobank brain MRI dataset, where multi-linear structure is paramount. In all cases, we demonstrate superior performance and significantly improved robustness, both to noisy inputs and to adversarial attacks. We rigorously validate the theoretical validity of our approach by establishing the link between our randomized decomposition and non-linear dropout. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.10758v4-abstract-full').style.display = 'none'; document.getElementById('1902.10758v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 February, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1901.02839">arXiv:1901.02839</a> <span> [<a href="https://arxiv.org/pdf/1901.02839">pdf</a>, <a href="https://arxiv.org/format/1901.02839">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TPAMI.2019.2944808">10.1109/TPAMI.2019.2944808 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&query=Walecki%2C+R">Robert Walecki</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+J">Jie Shen</a>, <a href="/search/cs?searchtype=author&query=Schmitt%2C+M">Maximilian Schmitt</a>, <a href="/search/cs?searchtype=author&query=Ringeval%2C+F">Fabien Ringeval</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jing Han</a>, <a href="/search/cs?searchtype=author&query=Pandit%2C+V">Vedhas Pandit</a>, <a href="/search/cs?searchtype=author&query=Toisoul%2C+A">Antoine Toisoul</a>, <a href="/search/cs?searchtype=author&query=Schuller%2C+B">Bjorn Schuller</a>, <a href="/search/cs?searchtype=author&query=Star%2C+K">Kam Star</a>, <a href="/search/cs?searchtype=author&query=Hajiyev%2C+E">Elnar Hajiyev</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1901.02839v2-abstract-short" style="display: inline;"> Natural human-computer interaction and audio-visual human behaviour sensing systems, which would achieve robust performance in-the-wild are more needed than ever as digital devices are increasingly becoming an indispensable part of our life. Accurately annotated real-world data are the crux in devising such systems. However, existing databases usually consider controlled settings, low demographic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.02839v2-abstract-full').style.display = 'inline'; document.getElementById('1901.02839v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1901.02839v2-abstract-full" style="display: none;"> Natural human-computer interaction and audio-visual human behaviour sensing systems, which would achieve robust performance in-the-wild are more needed than ever as digital devices are increasingly becoming an indispensable part of our life. Accurately annotated real-world data are the crux in devising such systems. However, existing databases usually consider controlled settings, low demographic variability, and a single task. In this paper, we introduce the SEWA database of more than 2000 minutes of audio-visual data of 398 people coming from six cultures, 50% female, and uniformly spanning the age range of 18 to 65 years old. Subjects were recorded in two different contexts: while watching adverts and while discussing adverts in a video chat. The database includes rich annotations of the recordings in terms of facial landmarks, facial action units (FAU), various vocalisations, mirroring, and continuously valued valence, arousal, liking, agreement, and prototypic examples of (dis)liking. This database aims to be an extremely valuable resource for researchers in affective computing and automatic human sensing and is expected to push forward the research in human behaviour analysis, including cultural studies. Along with the database, we provide extensive baseline experiments for automatic FAU detection and automatic valence, arousal and (dis)liking intensity estimation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.02839v2-abstract-full').style.display = 'none'; document.getElementById('1901.02839v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2019 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1802.04636">arXiv:1802.04636</a> <span> [<a href="https://arxiv.org/pdf/1802.04636">pdf</a>, <a href="https://arxiv.org/format/1802.04636">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Modeling of Facial Aging and Kinship: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Georgopoulos%2C+M">Markos Georgopoulos</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1802.04636v2-abstract-short" style="display: inline;"> Computational facial models that capture properties of facial cues related to aging and kinship increasingly attract the attention of the research community, enabling the development of reliable methods for age progression, age estimation, age-invariant facial characterization, and kinship verification from visual data. In this paper, we review recent advances in modeling of facial aging and kinsh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.04636v2-abstract-full').style.display = 'inline'; document.getElementById('1802.04636v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1802.04636v2-abstract-full" style="display: none;"> Computational facial models that capture properties of facial cues related to aging and kinship increasingly attract the attention of the research community, enabling the development of reliable methods for age progression, age estimation, age-invariant facial characterization, and kinship verification from visual data. In this paper, we review recent advances in modeling of facial aging and kinship. In particular, we provide an up-to date, complete list of available annotated datasets and an in-depth analysis of geometric, hand-crafted, and learned facial representations that are used for facial aging and kinship characterization. Moreover, evaluation protocols and metrics are reviewed and notable experimental results for each surveyed task are analyzed. This survey allows us to identify challenges and discuss future research directions for the development of robust facial models in real-world conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.04636v2-abstract-full').style.display = 'none'; document.getElementById('1802.04636v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.07580">arXiv:1801.07580</a> <span> [<a href="https://arxiv.org/pdf/1801.07580">pdf</a>, <a href="https://arxiv.org/format/1801.07580">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Side Information for Face Completion: a Robust PCA Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+N">Niannan Xue</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiankang Deng</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">Shiyang Cheng</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.07580v1-abstract-short" style="display: inline;"> Robust principal component analysis (RPCA) is a powerful method for learning low-rank feature representation of various visual data. However, for certain types as well as significant amount of error corruption, it fails to yield satisfactory results; a drawback that can be alleviated by exploiting domain-dependent prior knowledge or information. In this paper, we propose two models for the RPCA th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.07580v1-abstract-full').style.display = 'inline'; document.getElementById('1801.07580v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.07580v1-abstract-full" style="display: none;"> Robust principal component analysis (RPCA) is a powerful method for learning low-rank feature representation of various visual data. However, for certain types as well as significant amount of error corruption, it fails to yield satisfactory results; a drawback that can be alleviated by exploiting domain-dependent prior knowledge or information. In this paper, we propose two models for the RPCA that take into account such side information, even in the presence of missing values. We apply this framework to the task of UV completion which is widely used in pose-invariant face recognition. Moreover, we construct a generative adversarial network (GAN) to extract side information as well as subspaces. These subspaces not only assist in the recovery but also speed up the process in case of large-scale data. We quantitatively and qualitatively evaluate the proposed approaches through both synthetic data and five real-world datasets to verify their effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.07580v1-abstract-full').style.display = 'none'; document.getElementById('1801.07580v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:1702.00648</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.06665">arXiv:1801.06665</a> <span> [<a href="https://arxiv.org/pdf/1801.06665">pdf</a>, <a href="https://arxiv.org/format/1801.06665">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Visual Data Augmentation through Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chrysos%2C+G+G">Grigorios G. Chrysos</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.06665v1-abstract-short" style="display: inline;"> The rapid progress in machine learning methods has been empowered by i) huge datasets that have been collected and annotated, ii) improved engineering (e.g. data pre-processing/normalization). The existing datasets typically include several million samples, which constitutes their extension a colossal task. In addition, the state-of-the-art data-driven methods demand a vast amount of data, hence a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.06665v1-abstract-full').style.display = 'inline'; document.getElementById('1801.06665v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.06665v1-abstract-full" style="display: none;"> The rapid progress in machine learning methods has been empowered by i) huge datasets that have been collected and annotated, ii) improved engineering (e.g. data pre-processing/normalization). The existing datasets typically include several million samples, which constitutes their extension a colossal task. In addition, the state-of-the-art data-driven methods demand a vast amount of data, hence a standard engineering trick employed is artificial data augmentation for instance by adding into the data cropped and (affinely) transformed images. However, this approach does not correspond to any change in the natural 3D scene. We propose instead to perform data augmentation through learning realistic local transformations. We learn a forward and an inverse transformation that maps an image from the high-dimensional space of pixel intensities to a latent space which varies (approximately) linearly with the latent space of a realistically transformed version of the image. Such transformed images can be considered two successive frames in a video. Next, we utilize these transformations to learn a linear model that modifies the latent spaces and then use the inverse transformation to synthesize a new image. We argue that the this procedure produces powerful invariant representations. We perform both qualitative and quantitative experiments that demonstrate our proposed method creates new realistic images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.06665v1-abstract-full').style.display = 'none'; document.getElementById('1801.06665v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.06432">arXiv:1801.06432</a> <span> [<a href="https://arxiv.org/pdf/1801.06432">pdf</a>, <a href="https://arxiv.org/format/1801.06432">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Robust Kronecker Component Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bahri%2C+M">Mehdi Bahri</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.06432v2-abstract-short" style="display: inline;"> Dictionary learning and component analysis models are fundamental for learning compact representations that are relevant to a given task (feature extraction, dimensionality reduction, denoising, etc.). The model complexity is encoded by means of specific structure, such as sparsity, low-rankness, or nonnegativity. Unfortunately, approaches like K-SVD - that learn dictionaries for sparse coding via… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.06432v2-abstract-full').style.display = 'inline'; document.getElementById('1801.06432v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.06432v2-abstract-full" style="display: none;"> Dictionary learning and component analysis models are fundamental for learning compact representations that are relevant to a given task (feature extraction, dimensionality reduction, denoising, etc.). The model complexity is encoded by means of specific structure, such as sparsity, low-rankness, or nonnegativity. Unfortunately, approaches like K-SVD - that learn dictionaries for sparse coding via Singular Value Decomposition (SVD) - are hard to scale to high-volume and high-dimensional visual data, and fragile in the presence of outliers. Conversely, robust component analysis methods such as the Robust Principal Component Analysis (RPCA) are able to recover low-complexity (e.g., low-rank) representations from data corrupted with noise of unknown magnitude and support, but do not provide a dictionary that respects the structure of the data (e.g., images), and also involve expensive computations. In this paper, we propose a novel Kronecker-decomposable component analysis model, coined as Robust Kronecker Component Analysis (RKCA), that combines ideas from sparse dictionary learning and robust component analysis. RKCA has several appealing properties, including robustness to gross corruption; it can be used for low-rank modeling, and leverages separability to solve significantly smaller problems. We design an efficient learning algorithm by drawing links with a restricted form of tensor factorization, and analyze its optimality and low-rankness properties. The effectiveness of the proposed approach is demonstrated on real-world applications, namely background subtraction and image denoising and completion, by performing a thorough comparison with the current state of the art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.06432v2-abstract-full').style.display = 'none'; document.getElementById('1801.06432v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In IEEE Transactions on Pattern Analysis and Machine Intelligence, Special Issue on Compact and Efficient Feature Representation and Learning in Computer Vision, 2018. Contains appendices. arXiv admin note: text overlap with arXiv:1703.07886</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1712.05799">arXiv:1712.05799</a> <span> [<a href="https://arxiv.org/pdf/1712.05799">pdf</a>, <a href="https://arxiv.org/format/1712.05799">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JSTSP.2018.2877108">10.1109/JSTSP.2018.2877108 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Multi-Attribute Robust Component Analysis for Facial UV Maps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moschoglou%2C+S">Stylianos Moschoglou</a>, <a href="/search/cs?searchtype=author&query=Ververas%2C+E">Evangelos Ververas</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+M">Mihalis Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1712.05799v1-abstract-short" style="display: inline;"> Recently, due to the collection of large scale 3D face models, as well as the advent of deep learning, a significant progress has been made in the field of 3D face alignment "in-the-wild". That is, many methods have been proposed that establish sparse or dense 3D correspondences between a 2D facial image and a 3D face model. The utilization of 3D face alignment introduces new challenges and resear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.05799v1-abstract-full').style.display = 'inline'; document.getElementById('1712.05799v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1712.05799v1-abstract-full" style="display: none;"> Recently, due to the collection of large scale 3D face models, as well as the advent of deep learning, a significant progress has been made in the field of 3D face alignment "in-the-wild". That is, many methods have been proposed that establish sparse or dense 3D correspondences between a 2D facial image and a 3D face model. The utilization of 3D face alignment introduces new challenges and research directions, especially on the analysis of facial texture images. In particular, texture does not suffer any more from warping effects (that occurred when 2D face alignment methods were used). Nevertheless, since facial images are commonly captured in arbitrary recording conditions, a considerable amount of missing information and gross outliers is observed (e.g., due to self-occlusion, or subjects wearing eye-glasses). Given that many annotated databases have been developed for face analysis tasks, it is evident that component analysis techniques need to be developed in order to alleviate issues arising from the aforementioned challenges. In this paper, we propose a novel component analysis technique that is suitable for facial UV maps containing a considerable amount of missing information and outliers, while additionally, incorporates knowledge from various attributes (such as age and identity). We evaluate the proposed Multi-Attribute Robust Component Analysis (MA-RCA) on problems such as UV completion and age progression, where the proposed method outperforms compared techniques. Finally, we demonstrate that MA-RCA method is powerful enough to provide weak annotations for training deep learning systems for various applications, such as illumination transfer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.05799v1-abstract-full').style.display = 'none'; document.getElementById('1712.05799v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1712.00684">arXiv:1712.00684</a> <span> [<a href="https://arxiv.org/pdf/1712.00684">pdf</a>, <a href="https://arxiv.org/format/1712.00684">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GAGAN: Geometry-Aware Generative Adversarial Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+L">Linh Tran</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1712.00684v3-abstract-short" style="display: inline;"> Deep generative models learned through adversarial training have become increasingly popular for their ability to generate naturalistic image textures. However, aside from their texture, the visual appearance of objects is significantly influenced by their shape geometry; information which is not taken into account by existing generative models. This paper introduces the Geometry-Aware Generative… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.00684v3-abstract-full').style.display = 'inline'; document.getElementById('1712.00684v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1712.00684v3-abstract-full" style="display: none;"> Deep generative models learned through adversarial training have become increasingly popular for their ability to generate naturalistic image textures. However, aside from their texture, the visual appearance of objects is significantly influenced by their shape geometry; information which is not taken into account by existing generative models. This paper introduces the Geometry-Aware Generative Adversarial Networks (GAGAN) for incorporating geometric information into the image generation process. Specifically, in GAGAN the generator samples latent variables from the probability space of a statistical shape model. By mapping the output of the generator to a canonical coordinate frame through a differentiable geometric transformation, we enforce the geometry of the objects and add an implicit connection from the prior to the generated object. Experimental results on face generation indicate that the GAGAN can generate realistic images of faces with arbitrary facial attributes such as facial expression, pose, and morphology, that are of better quality than current GAN-based methods. Our method can be used to augment any existing GAN architecture and improve the quality of the images generated. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.00684v3-abstract-full').style.display = 'none'; document.getElementById('1712.00684v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 December, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1711.10402">arXiv:1711.10402</a> <span> [<a href="https://arxiv.org/pdf/1711.10402">pdf</a>, <a href="https://arxiv.org/format/1711.10402">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s11263-019-01163-7">10.1007/s11263-019-01163-7 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> An Adversarial Neuro-Tensorial Approach For Learning Disentangled Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mengjiao Wang</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+Z">Zhixin Shu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">Shiyang Cheng</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Samaras%2C+D">Dimitris Samaras</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1711.10402v2-abstract-short" style="display: inline;"> Several factors contribute to the appearance of an object in a visual scene, including pose, illumination, and deformation, among others. Each factor accounts for a source of variability in the data, while the multiplicative interactions of these factors emulate the entangled variability, giving rise to the rich structure of visual object appearance. Disentangling such unobserved factors from visu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.10402v2-abstract-full').style.display = 'inline'; document.getElementById('1711.10402v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1711.10402v2-abstract-full" style="display: none;"> Several factors contribute to the appearance of an object in a visual scene, including pose, illumination, and deformation, among others. Each factor accounts for a source of variability in the data, while the multiplicative interactions of these factors emulate the entangled variability, giving rise to the rich structure of visual object appearance. Disentangling such unobserved factors from visual data is a challenging task, especially when the data have been captured in uncontrolled recording conditions (also referred to as "in-the-wild") and label information is not available. In this paper, we propose the first unsupervised deep learning method (with pseudo-supervision) for disentangling multiple latent factors of variation in face images captured in-the-wild. To this end, we propose a deep latent variable model, where the multiplicative interactions of multiple latent factors of variation are explicitly modelled by means of multilinear (tensor) structure. We demonstrate that the proposed approach indeed learns disentangled representations of facial expressions and pose, which can be used in various applications, including face editing, as well as 3D face reconstruction and classification of facial expression, identity and pose. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.10402v2-abstract-full').style.display = 'none'; document.getElementById('1711.10402v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> International Journal of Computer Vision, 2019 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1709.04836">arXiv:1709.04836</a> <span> [<a href="https://arxiv.org/pdf/1709.04836">pdf</a>, <a href="https://arxiv.org/format/1709.04836">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Informed Non-convex Robust Principal Component Analysis with Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+N">Niannan Xue</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiankang Deng</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1709.04836v1-abstract-short" style="display: inline;"> We revisit the problem of robust principal component analysis with features acting as prior side information. To this aim, a novel, elegant, non-convex optimization approach is proposed to decompose a given observation matrix into a low-rank core and the corresponding sparse residual. Rigorous theoretical analysis of the proposed algorithm results in exact recovery guarantees with low computationa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1709.04836v1-abstract-full').style.display = 'inline'; document.getElementById('1709.04836v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1709.04836v1-abstract-full" style="display: none;"> We revisit the problem of robust principal component analysis with features acting as prior side information. To this aim, a novel, elegant, non-convex optimization approach is proposed to decompose a given observation matrix into a low-rank core and the corresponding sparse residual. Rigorous theoretical analysis of the proposed algorithm results in exact recovery guarantees with low computational complexity. Aptly designed synthetic experiments demonstrate that our method is the first to wholly harness the power of non-convexity over convexity in terms of both recoverability and speed. That is, the proposed non-convex approach is more accurate and faster compared to the best available algorithms for the problem under study. Two real-world applications, namely image classification and face denoising further exemplify the practical superiority of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1709.04836v1-abstract-full').style.display = 'none'; document.getElementById('1709.04836v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1703.07886">arXiv:1703.07886</a> <span> [<a href="https://arxiv.org/pdf/1703.07886">pdf</a>, <a href="https://arxiv.org/format/1703.07886">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robust Kronecker-Decomposable Component Analysis for Low-Rank Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bahri%2C+M">Mehdi Bahri</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1703.07886v2-abstract-short" style="display: inline;"> Dictionary learning and component analysis are part of one of the most well-studied and active research fields, at the intersection of signal and image processing, computer vision, and statistical machine learning. In dictionary learning, the current methods of choice are arguably K-SVD and its variants, which learn a dictionary (i.e., a decomposition) for sparse coding via Singular Value Decompos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.07886v2-abstract-full').style.display = 'inline'; document.getElementById('1703.07886v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1703.07886v2-abstract-full" style="display: none;"> Dictionary learning and component analysis are part of one of the most well-studied and active research fields, at the intersection of signal and image processing, computer vision, and statistical machine learning. In dictionary learning, the current methods of choice are arguably K-SVD and its variants, which learn a dictionary (i.e., a decomposition) for sparse coding via Singular Value Decomposition. In robust component analysis, leading methods derive from Principal Component Pursuit (PCP), which recovers a low-rank matrix from sparse corruptions of unknown magnitude and support. However, K-SVD is sensitive to the presence of noise and outliers in the training set. Additionally, PCP does not provide a dictionary that respects the structure of the data (e.g., images), and requires expensive SVD computations when solved by convex relaxation. In this paper, we introduce a new robust decomposition of images by combining ideas from sparse dictionary learning and PCP. We propose a novel Kronecker-decomposable component analysis which is robust to gross corruption, can be used for low-rank modeling, and leverages separability to solve significantly smaller problems. We design an efficient learning algorithm by drawing links with a restricted form of tensor factorization. The effectiveness of the proposed approach is demonstrated on real-world applications, namely background subtraction and image denoising, by performing a thorough comparison with the current state of the art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.07886v2-abstract-full').style.display = 'none'; document.getElementById('1703.07886v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 July, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at ICCV 2017</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1702.00648">arXiv:1702.00648</a> <span> [<a href="https://arxiv.org/pdf/1702.00648">pdf</a>, <a href="https://arxiv.org/format/1702.00648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Side Information in Robust Principal Component Analysis: Algorithms and Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+N">Niannan Xue</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1702.00648v2-abstract-short" style="display: inline;"> Robust Principal Component Analysis (RPCA) aims at recovering a low-rank subspace from grossly corrupted high-dimensional (often visual) data and is a cornerstone in many machine learning and computer vision applications. Even though RPCA has been shown to be very successful in solving many rank minimisation problems, there are still cases where degenerate or suboptimal solutions are obtained. Thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1702.00648v2-abstract-full').style.display = 'inline'; document.getElementById('1702.00648v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1702.00648v2-abstract-full" style="display: none;"> Robust Principal Component Analysis (RPCA) aims at recovering a low-rank subspace from grossly corrupted high-dimensional (often visual) data and is a cornerstone in many machine learning and computer vision applications. Even though RPCA has been shown to be very successful in solving many rank minimisation problems, there are still cases where degenerate or suboptimal solutions are obtained. This is likely to be remedied by taking into account of domain-dependent prior knowledge. In this paper, we propose two models for the RPCA problem with the aid of side information on the low-rank structure of the data. The versatility of the proposed methods is demonstrated by applying them to four applications, namely background subtraction, facial image denoising, face and facial expression recognition. Experimental results on synthetic and five real world datasets indicate the robustness and effectiveness of the proposed methods on these application domains, largely outperforming six previous approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1702.00648v2-abstract-full').style.display = 'none'; document.getElementById('1702.00648v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 February, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1701.05360">arXiv:1701.05360</a> <span> [<a href="https://arxiv.org/pdf/1701.05360">pdf</a>, <a href="https://arxiv.org/format/1701.05360">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> 3D Face Morphable Models "In-the-Wild" </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Booth%2C+J">James Booth</a>, <a href="/search/cs?searchtype=author&query=Antonakos%2C+E">Epameinondas Antonakos</a>, <a href="/search/cs?searchtype=author&query=Ploumpis%2C+S">Stylianos Ploumpis</a>, <a href="/search/cs?searchtype=author&query=Trigeorgis%2C+G">George Trigeorgis</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1701.05360v1-abstract-short" style="display: inline;"> 3D Morphable Models (3DMMs) are powerful statistical models of 3D facial shape and texture, and among the state-of-the-art methods for reconstructing facial shape from single images. With the advent of new 3D sensors, many 3D facial datasets have been collected containing both neutral as well as expressive faces. However, all datasets are captured under controlled conditions. Thus, even though pow… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1701.05360v1-abstract-full').style.display = 'inline'; document.getElementById('1701.05360v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1701.05360v1-abstract-full" style="display: none;"> 3D Morphable Models (3DMMs) are powerful statistical models of 3D facial shape and texture, and among the state-of-the-art methods for reconstructing facial shape from single images. With the advent of new 3D sensors, many 3D facial datasets have been collected containing both neutral as well as expressive faces. However, all datasets are captured under controlled conditions. Thus, even though powerful 3D facial shape models can be learnt from such data, it is difficult to build statistical texture models that are sufficient to reconstruct faces captured in unconstrained conditions ("in-the-wild"). In this paper, we propose the first, to the best of our knowledge, "in-the-wild" 3DMM by combining a powerful statistical model of facial shape, which describes both identity and expression, with an "in-the-wild" texture model. We show that the employment of such an "in-the-wild" texture model greatly simplifies the fitting procedure, because there is no need to optimize with regards to the illumination parameters. Furthermore, we propose a new fast algorithm for fitting the 3DMM in arbitrary images. Finally, we have captured the first 3D facial database with relatively unconstrained conditions and report quantitative evaluations with state-of-the-art performance. Complementary qualitative reconstruction results are demonstrated on standard "in-the-wild" facial databases. An open source implementation of our technique is released as part of the Menpo Project. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1701.05360v1-abstract-full').style.display = 'none'; document.getElementById('1701.05360v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1612.00750">arXiv:1612.00750</a> <span> [<a href="https://arxiv.org/pdf/1612.00750">pdf</a>, <a href="https://arxiv.org/format/1612.00750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Non-Negative Matrix Factorizations for Multiplex Network Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gligorijevic%2C+V">Vladimir Gligorijevic</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1612.00750v2-abstract-short" style="display: inline;"> Networks have been a general tool for representing, analyzing, and modeling relational data arising in several domains. One of the most important aspect of network analysis is community detection or network clustering. Until recently, the major focus have been on discovering community structure in single (i.e., monoplex) networks. However, with the advent of relational data with multiple modalitie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1612.00750v2-abstract-full').style.display = 'inline'; document.getElementById('1612.00750v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1612.00750v2-abstract-full" style="display: none;"> Networks have been a general tool for representing, analyzing, and modeling relational data arising in several domains. One of the most important aspect of network analysis is community detection or network clustering. Until recently, the major focus have been on discovering community structure in single (i.e., monoplex) networks. However, with the advent of relational data with multiple modalities, multiplex networks, i.e., networks composed of multiple layers representing different aspects of relations, have emerged. Consequently, community detection in multiplex network, i.e., detecting clusters of nodes shared by all layers, has become a new challenge. In this paper, we propose Network Fusion for Composite Community Extraction (NF-CCE), a new class of algorithms, based on four different non-negative matrix factorization models, capable of extracting composite communities in multiplex networks. Each algorithm works in two steps: first, it finds a non-negative, low-dimensional feature representation of each network layer; then, it fuses the feature representation of layers into a common non-negative, low-dimensional feature representation via collective factorization. The composite clusters are extracted from the common feature representation. We demonstrate the superior performance of our algorithms over the state-of-the-art methods on various types of multiplex networks, including biological, social, economic, citation, phone communication, and brain multiplex networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1612.00750v2-abstract-full').style.display = 'none'; document.getElementById('1612.00750v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 November, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 4 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1610.09555">arXiv:1610.09555</a> <span> [<a href="https://arxiv.org/pdf/1610.09555">pdf</a>, <a href="https://arxiv.org/format/1610.09555">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> TensorLy: Tensor Learning in Python </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Anandkumar%2C+A">Anima Anandkumar</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1610.09555v2-abstract-short" style="display: inline;"> Tensors are higher-order extensions of matrices. While matrix methods form the cornerstone of machine learning and data analysis, tensor methods have been gaining increasing traction. However, software support for tensor operations is not on the same footing. In order to bridge this gap, we have developed \emph{TensorLy}, a high-level API for tensor methods and deep tensorized neural networks in P… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1610.09555v2-abstract-full').style.display = 'inline'; document.getElementById('1610.09555v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1610.09555v2-abstract-full" style="display: none;"> Tensors are higher-order extensions of matrices. While matrix methods form the cornerstone of machine learning and data analysis, tensor methods have been gaining increasing traction. However, software support for tensor operations is not on the same footing. In order to bridge this gap, we have developed \emph{TensorLy}, a high-level API for tensor methods and deep tensorized neural networks in Python. TensorLy aims to follow the same standards adopted by the main projects of the Python scientific community, and seamlessly integrates with them. Its BSD license makes it suitable for both academic and commercial applications. TensorLy's backend system allows users to perform computations with NumPy, MXNet, PyTorch, TensorFlow and CuPy. They can be scaled on multiple CPU or GPU machines. In addition, using the deep-learning frameworks as backend allows users to easily design and train deep tensorized neural networks. TensorLy is available at https://github.com/tensorly/tensorly <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1610.09555v2-abstract-full').style.display = 'none'; document.getElementById('1610.09555v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 October, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2016. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1502.00852">arXiv:1502.00852</a> <span> [<a href="https://arxiv.org/pdf/1502.00852">pdf</a>, <a href="https://arxiv.org/format/1502.00852">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Face frontalization for Alignment and Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sagonas%2C+C">Christos Sagonas</a>, <a href="/search/cs?searchtype=author&query=Panagakis%2C+Y">Yannis Panagakis</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a>, <a href="/search/cs?searchtype=author&query=Pantic%2C+M">Maja Pantic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1502.00852v1-abstract-short" style="display: inline;"> Recently, it was shown that excellent results can be achieved in both face landmark localization and pose-invariant face recognition. These breakthroughs are attributed to the efforts of the community to manually annotate facial images in many different poses and to collect 3D faces data. In this paper, we propose a novel method for joint face landmark localization and frontal face reconstruction… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1502.00852v1-abstract-full').style.display = 'inline'; document.getElementById('1502.00852v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1502.00852v1-abstract-full" style="display: none;"> Recently, it was shown that excellent results can be achieved in both face landmark localization and pose-invariant face recognition. These breakthroughs are attributed to the efforts of the community to manually annotate facial images in many different poses and to collect 3D faces data. In this paper, we propose a novel method for joint face landmark localization and frontal face reconstruction (pose correction) using a small set of frontal images only. By observing that the frontal facial image is the one with the minimum rank from all different poses we formulate an appropriate model which is able to jointly recover the facial landmarks as well as the frontalized version of the face. To this end, a suitable optimization problem, involving the minimization of the nuclear norm and the matrix $\ell_1$ norm, is solved. The proposed method is assessed in frontal face reconstruction (pose correction), face landmark localization, and pose-invariant face recognition and verification by conducting experiments on $6$ facial images databases. The experimental results demonstrate the effectiveness of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1502.00852v1-abstract-full').style.display = 'none'; document.getElementById('1502.00852v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository