Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–41 of 41 results for author: <span class="mathjax">Hui, Z</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Hui%2C+Z">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Hui, Z"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Hui%2C+Z&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Hui, Z"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06802">arXiv:2502.06802</a> <span> [<a href="https://arxiv.org/pdf/2502.06802">pdf</a>, <a href="https://arxiv.org/format/2502.06802">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Solving the Content Gap in Roblox Game Recommendations: LLM-Based Profile Generation and Reranking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chen Wang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xiaokai Wei</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yexi Jiang</a>, <a href="/search/cs?searchtype=author&query=Ong%2C+F">Frank Ong</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+K">Kevin Gao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiao Yu</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+S">Se-eun Yoon</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P">Philip Yu</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+M">Michelle Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06802v1-abstract-short" style="display: inline;"> With the vast and dynamic user-generated content on Roblox, creating effective game recommendations requires a deep understanding of game content. Traditional recommendation models struggle with the inconsistent and sparse nature of game text features such as titles and descriptions. Recent advancements in large language models (LLMs) offer opportunities to enhance recommendation systems by analyz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06802v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06802v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06802v1-abstract-full" style="display: none;"> With the vast and dynamic user-generated content on Roblox, creating effective game recommendations requires a deep understanding of game content. Traditional recommendation models struggle with the inconsistent and sparse nature of game text features such as titles and descriptions. Recent advancements in large language models (LLMs) offer opportunities to enhance recommendation systems by analyzing in-game text data. This paper addresses two challenges: generating high-quality, structured text features for games without extensive human annotation, and validating these features to ensure they improve recommendation relevance. We propose an approach that extracts in-game text and uses LLMs to infer attributes such as genre and gameplay objectives from raw player interactions. Additionally, we introduce an LLM-based re-ranking mechanism to assess the effectiveness of the generated text features, enhancing personalization and user satisfaction. Beyond recommendations, our approach supports applications such as user engagement-based integrity detection, already deployed in production. This scalable framework demonstrates the potential of in-game text understanding to improve recommendation quality on Roblox and adapt recommendations to its unique, user-generated ecosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06802v1-abstract-full').style.display = 'none'; document.getElementById('2502.06802v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01403">arXiv:2502.01403</a> <span> [<a href="https://arxiv.org/pdf/2502.01403">pdf</a>, <a href="https://arxiv.org/format/2502.01403">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AdaSVD: Adaptive Singular Value Decomposition for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiteng Li</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+M">Mingyuan Xia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jingyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+L">Linghe Kong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yulun Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaokang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01403v2-abstract-short" style="display: inline;"> Large language models (LLMs) have achieved remarkable success in natural language processing (NLP) tasks, yet their substantial memory requirements present significant challenges for deployment on resource-constrained devices. Singular Value Decomposition (SVD) has emerged as a promising compression technique for LLMs, offering considerable reductions in memory overhead. However, existing SVD-base… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01403v2-abstract-full').style.display = 'inline'; document.getElementById('2502.01403v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01403v2-abstract-full" style="display: none;"> Large language models (LLMs) have achieved remarkable success in natural language processing (NLP) tasks, yet their substantial memory requirements present significant challenges for deployment on resource-constrained devices. Singular Value Decomposition (SVD) has emerged as a promising compression technique for LLMs, offering considerable reductions in memory overhead. However, existing SVD-based methods often struggle to effectively mitigate the errors introduced by SVD truncation, leading to a noticeable performance gap when compared to the original models. Furthermore, applying a uniform compression ratio across all transformer layers fails to account for the varying importance of different layers. To address these challenges, we propose AdaSVD, an adaptive SVD-based LLM compression approach. Specifically, AdaSVD introduces adaComp, which adaptively compensates for SVD truncation errors by alternately updating the singular matrices U and V^T. Additionally, AdaSVD introduces adaCR, which adaptively assigns layer-specific compression ratios based on the relative importance of each layer. Extensive experiments across multiple LLM families and evaluation metrics demonstrate that AdaSVD consistently outperforms state-of-the-art (SOTA) SVD-based methods, achieving superior performance with significantly reduced memory requirements. The code and models will be available at https://github.com/ZHITENGLI/AdaSVD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01403v2-abstract-full').style.display = 'none'; document.getElementById('2502.01403v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The code and models will be available at https://github.com/ZHITENGLI/AdaSVD</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18155">arXiv:2501.18155</a> <span> [<a href="https://arxiv.org/pdf/2501.18155">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Model Checking for Multi-Agent Systems Modeled By Epistemic Process Calculus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qixian Yu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Z">Zining Cao</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zong Hui</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuan Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18155v1-abstract-short" style="display: inline;"> This paper presents a comprehensive framework for modeling and verifying multi-agent systems. The paper introduce an Epistemic Process Calculus for multi-agent systems, which formalizes the syntax and semantics to capture the essential features of agent behavior interactions and epistemic states. Building upon this calculus, we propose ATLE, an extension of Alternating-time Temporal Logic incorpor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18155v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18155v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18155v1-abstract-full" style="display: none;"> This paper presents a comprehensive framework for modeling and verifying multi-agent systems. The paper introduce an Epistemic Process Calculus for multi-agent systems, which formalizes the syntax and semantics to capture the essential features of agent behavior interactions and epistemic states. Building upon this calculus, we propose ATLE, an extension of Alternating-time Temporal Logic incorporating epistemic operators to express complex properties related to agent epistemic state. To verify ATLE specifications, this paper presents a model checking algorithm that systematically explores the state space of a multi-agent system and evaluates the satisfaction of the specified properties. Finally, a case study is given to demonstrate the method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18155v1-abstract-full').style.display = 'none'; document.getElementById('2501.18155v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09817">arXiv:2412.09817</a> <span> [<a href="https://arxiv.org/pdf/2412.09817">pdf</a>, <a href="https://arxiv.org/format/2412.09817">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Multimodal Large Language Models Complex Reason via Similarity Computation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaofeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+F">Fanshuo Zeng</a>, <a href="/search/cs?searchtype=author&query=Quan%2C+Y">Yihao Quan</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+J">Jiawei Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09817v1-abstract-short" style="display: inline;"> Multimodal large language models have experienced rapid growth, and numerous different models have emerged. The interpretability of LVLMs remains an under-explored area. Especially when faced with more complex tasks such as chain-of-thought reasoning, its internal mechanisms still resemble a black box that is difficult to decipher. By studying the interaction and information flow between images an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09817v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09817v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09817v1-abstract-full" style="display: none;"> Multimodal large language models have experienced rapid growth, and numerous different models have emerged. The interpretability of LVLMs remains an under-explored area. Especially when faced with more complex tasks such as chain-of-thought reasoning, its internal mechanisms still resemble a black box that is difficult to decipher. By studying the interaction and information flow between images and text, we noticed that in models such as LLaVA1.5, image tokens that are semantically related to text are more likely to have information flow convergence in the LLM decoding layer, and these image tokens receive higher attention scores. However, those image tokens that are less relevant to the text do not have information flow convergence, and they only get very small attention scores. To efficiently utilize the image information, we propose a new image token reduction method, Simignore, which aims to improve the complex reasoning ability of LVLMs by computing the similarity between image and text embeddings and ignoring image tokens that are irrelevant and unimportant to the text. Through extensive experiments, we demonstrate the effectiveness of our method for complex reasoning tasks. The paper's source code can be accessed from \url{https://github.com/FanshuoZeng/Simignore}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09817v1-abstract-full').style.display = 'none'; document.getElementById('2412.09817v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17127">arXiv:2411.17127</a> <span> [<a href="https://arxiv.org/pdf/2411.17127">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> </div> </div> <p class="title is-5 mathjax"> C sequential optimization numbers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zile Hui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17127v1-abstract-short" style="display: inline;"> This work establishes a definition that is more basic than the previous ones, for the Stirling numbers of first kind, which is a sufficient but not necessary condition for the previous definition. Based on this definition and a combinatorial problem, we discover C sequential optimization numbers, where C is a k+1-tuple vector. For C= (0,1), we prove that C sequential optimization numbers are the u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17127v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17127v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17127v1-abstract-full" style="display: none;"> This work establishes a definition that is more basic than the previous ones, for the Stirling numbers of first kind, which is a sufficient but not necessary condition for the previous definition. Based on this definition and a combinatorial problem, we discover C sequential optimization numbers, where C is a k+1-tuple vector. For C= (0,1), we prove that C sequential optimization numbers are the unsigned Stirling numbers of first kind. We can deduce the properties of C sequential optimization numbers by following the properties of the Stirling numbers of first kind and we give specific examples such as the recurrence formula and an instance of C sequential optimization numbers. We also give specific new properties such as an explicit upper bound of them. We prove the probability that the unsigned Stirling numbers of first kind are concentrated in O(logn) is nearly 100%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17127v1-abstract-full').style.display = 'none'; document.getElementById('2411.17127v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15175">arXiv:2411.15175</a> <span> [<a href="https://arxiv.org/pdf/2411.15175">pdf</a>, <a href="https://arxiv.org/format/2411.15175">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ToxiLab: How Well Do Open-Source LLMs Generate Synthetic Toxicity Data? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhaoxiao Guo</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hang Zhao</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+J">Juanyong Duan</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+L">Lin Ai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yinheng Li</a>, <a href="/search/cs?searchtype=author&query=Hirschberg%2C+J">Julia Hirschberg</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Congrui Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15175v3-abstract-short" style="display: inline;"> Effective toxic content detection relies heavily on high-quality and diverse data, which serve as the foundation for robust content moderation models. Synthetic data has become a common approach for training models across various NLP tasks. However, its effectiveness remains uncertain for highly subjective tasks like hate speech detection, with previous research yielding mixed results. This study… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15175v3-abstract-full').style.display = 'inline'; document.getElementById('2411.15175v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15175v3-abstract-full" style="display: none;"> Effective toxic content detection relies heavily on high-quality and diverse data, which serve as the foundation for robust content moderation models. Synthetic data has become a common approach for training models across various NLP tasks. However, its effectiveness remains uncertain for highly subjective tasks like hate speech detection, with previous research yielding mixed results. This study explores the potential of open-source LLMs for harmful data synthesis, utilizing controlled prompting and supervised fine-tuning techniques to enhance data quality and diversity. We systematically evaluated 6 open source LLMs on 5 datasets, assessing their ability to generate diverse, high-quality harmful data while minimizing hallucination and duplication. Our results show that Mistral consistently outperforms other open models, and supervised fine-tuning significantly enhances data reliability and diversity. We further analyze the trade-offs between prompt-based vs. fine-tuned toxic data synthesis, discuss real-world deployment challenges, and highlight ethical considerations. Our findings demonstrate that fine-tuned open source LLMs provide scalable and cost-effective solutions to augment toxic content detection datasets, paving the way for more accessible and transparent content moderation tools. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15175v3-abstract-full').style.display = 'none'; document.getElementById('2411.15175v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18997">arXiv:2409.18997</a> <span> [<a href="https://arxiv.org/pdf/2409.18997">pdf</a>, <a href="https://arxiv.org/format/2409.18997">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> PropaInsight: Toward Deeper Understanding of Propaganda in Terms of Techniques, Appeals, and Intent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiateng Liu</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+L">Lin Ai</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zizhou Liu</a>, <a href="/search/cs?searchtype=author&query=Karisani%2C+P">Payam Karisani</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Fung%2C+M">May Fung</a>, <a href="/search/cs?searchtype=author&query=Nakov%2C+P">Preslav Nakov</a>, <a href="/search/cs?searchtype=author&query=Hirschberg%2C+J">Julia Hirschberg</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+H">Heng Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18997v2-abstract-short" style="display: inline;"> Propaganda plays a critical role in shaping public opinion and fueling disinformation. While existing research primarily focuses on identifying propaganda techniques, it lacks the ability to capture the broader motives and the impacts of such content. To address these challenges, we introduce propainsight, a conceptual framework grounded in foundational social science research, which systematicall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18997v2-abstract-full').style.display = 'inline'; document.getElementById('2409.18997v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18997v2-abstract-full" style="display: none;"> Propaganda plays a critical role in shaping public opinion and fueling disinformation. While existing research primarily focuses on identifying propaganda techniques, it lacks the ability to capture the broader motives and the impacts of such content. To address these challenges, we introduce propainsight, a conceptual framework grounded in foundational social science research, which systematically dissects propaganda into techniques, arousal appeals, and underlying intent. propainsight offers a more granular understanding of how propaganda operates across different contexts. Additionally, we present propagaze, a novel dataset that combines human-annotated data with high-quality synthetic data generated through a meticulously designed pipeline. Our experiments show that off-the-shelf LLMs struggle with propaganda analysis, but training with propagaze significantly improves performance. Fine-tuned Llama-7B-Chat achieves 203.4% higher text span IoU in technique identification and 66.2% higher BertScore in appeal analysis compared to 1-shot GPT-4-Turbo. Moreover, propagaze complements limited human-annotated data in data-sparse and cross-domain scenarios, showing its potential for comprehensive and generalizable propaganda analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18997v2-abstract-full').style.display = 'none'; document.getElementById('2409.18997v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14740">arXiv:2409.14740</a> <span> [<a href="https://arxiv.org/pdf/2409.14740">pdf</a>, <a href="https://arxiv.org/format/2409.14740">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ToxiCraft: A Novel Framework for Synthetic Generation of Harmful Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhaoxiao Guo</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hang Zhao</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+J">Juanyong Duan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Congrui Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14740v1-abstract-short" style="display: inline;"> In different NLP tasks, detecting harmful content is crucial for online environments, especially with the growing influence of social media. However, previous research has two main issues: 1) a lack of data in low-resource settings, and 2) inconsistent definitions and criteria for judging harmful content, requiring classification models to be robust to spurious features and diverse. We propose Tox… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14740v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14740v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14740v1-abstract-full" style="display: none;"> In different NLP tasks, detecting harmful content is crucial for online environments, especially with the growing influence of social media. However, previous research has two main issues: 1) a lack of data in low-resource settings, and 2) inconsistent definitions and criteria for judging harmful content, requiring classification models to be robust to spurious features and diverse. We propose Toxicraft, a novel framework for synthesizing datasets of harmful information to address these weaknesses. With only a small amount of seed data, our framework can generate a wide variety of synthetic, yet remarkably realistic, examples of toxic information. Experimentation across various datasets showcases a notable enhancement in detection model robustness and adaptability, surpassing or close to the gold labels. We release the generated data at Github upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14740v1-abstract-full').style.display = 'none'; document.getElementById('2409.14740v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08264">arXiv:2409.08264</a> <span> [<a href="https://arxiv.org/pdf/2409.08264">pdf</a>, <a href="https://arxiv.org/format/2409.08264">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Windows Agent Arena: Evaluating Multi-Modal OS Agents at Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bonatti%2C+R">Rogerio Bonatti</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Dan Zhao</a>, <a href="/search/cs?searchtype=author&query=Bonacci%2C+F">Francesco Bonacci</a>, <a href="/search/cs?searchtype=author&query=Dupont%2C+D">Dillon Dupont</a>, <a href="/search/cs?searchtype=author&query=Abdali%2C+S">Sara Abdali</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yinheng Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yadong Lu</a>, <a href="/search/cs?searchtype=author&query=Wagle%2C+J">Justin Wagle</a>, <a href="/search/cs?searchtype=author&query=Koishida%2C+K">Kazuhito Koishida</a>, <a href="/search/cs?searchtype=author&query=Bucker%2C+A">Arthur Bucker</a>, <a href="/search/cs?searchtype=author&query=Jang%2C+L">Lawrence Jang</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zack Hui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08264v2-abstract-short" style="display: inline;"> Large language models (LLMs) show remarkable potential to act as computer agents, enhancing human productivity and software accessibility in multi-modal tasks that require planning and reasoning. However, measuring agent performance in realistic environments remains a challenge since: (i) most benchmarks are limited to specific modalities or domains (e.g. text-only, web navigation, Q&A, coding) an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08264v2-abstract-full').style.display = 'inline'; document.getElementById('2409.08264v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08264v2-abstract-full" style="display: none;"> Large language models (LLMs) show remarkable potential to act as computer agents, enhancing human productivity and software accessibility in multi-modal tasks that require planning and reasoning. However, measuring agent performance in realistic environments remains a challenge since: (i) most benchmarks are limited to specific modalities or domains (e.g. text-only, web navigation, Q&A, coding) and (ii) full benchmark evaluations are slow (on order of magnitude of days) given the multi-step sequential nature of tasks. To address these challenges, we introduce the Windows Agent Arena: a reproducible, general environment focusing exclusively on the Windows operating system (OS) where agents can operate freely within a real Windows OS and use the same wide range of applications, tools, and web browsers available to human users when solving tasks. We adapt the OSWorld framework (Xie et al., 2024) to create 150+ diverse Windows tasks across representative domains that require agent abilities in planning, screen understanding, and tool usage. Our benchmark is scalable and can be seamlessly parallelized in Azure for a full benchmark evaluation in as little as 20 minutes. To demonstrate Windows Agent Arena's capabilities, we also introduce a new multi-modal agent, Navi. Our agent achieves a success rate of 19.5% in the Windows domain, compared to 74.5% performance of an unassisted human. Navi also demonstrates strong performance on another popular web-based benchmark, Mind2Web. We offer extensive quantitative and qualitative analysis of Navi's performance, and provide insights into the opportunities for future research in agent development and data generation using Windows Agent Arena. Webpage: https://microsoft.github.io/WindowsAgentArena Code: https://github.com/microsoft/WindowsAgentArena <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08264v2-abstract-full').style.display = 'none'; document.getElementById('2409.08264v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12871">arXiv:2408.12871</a> <span> [<a href="https://arxiv.org/pdf/2408.12871">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DeepDiveAI: Identifying AI Related Documents in Large Scale Literature Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiaochen%2C+Z">Zhou Xiaochen</a>, <a href="/search/cs?searchtype=author&query=Xingzhou%2C+L">Liang Xingzhou</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zou Hui</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+L">Lu Yi</a>, <a href="/search/cs?searchtype=author&query=Jingjing%2C+Q">Qu Jingjing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12871v3-abstract-short" style="display: inline;"> This paper presents DeepDiveAI, a comprehensive dataset specifically curated to identify AI-related research papers from a large-scale academic literature database. The dataset was created using an advanced Long Short-Term Memory (LSTM) model trained on a binary classification task to distinguish between AI-related and non-AI-related papers. The model was trained and validated on a vast dataset, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12871v3-abstract-full').style.display = 'inline'; document.getElementById('2408.12871v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12871v3-abstract-full" style="display: none;"> This paper presents DeepDiveAI, a comprehensive dataset specifically curated to identify AI-related research papers from a large-scale academic literature database. The dataset was created using an advanced Long Short-Term Memory (LSTM) model trained on a binary classification task to distinguish between AI-related and non-AI-related papers. The model was trained and validated on a vast dataset, achieving high accuracy, precision, recall, and F1-score. The resulting DeepDelveAI dataset comprises over 9.4 million AI-related papers published since Dartmouth Conference, from 1956 to 2024, providing a crucial resource for analyzing trends, thematic developments, and the evolution of AI research across various disciplines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12871v3-abstract-full').style.display = 'none'; document.getElementById('2408.12871v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages and 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12263">arXiv:2406.12263</a> <span> [<a href="https://arxiv.org/pdf/2406.12263">pdf</a>, <a href="https://arxiv.org/format/2406.12263">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Defending Against Social Engineering Attacks in the Age of LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ai%2C+L">Lin Ai</a>, <a href="/search/cs?searchtype=author&query=Kumarage%2C+T">Tharindu Kumarage</a>, <a href="/search/cs?searchtype=author&query=Bhattacharjee%2C+A">Amrita Bhattacharjee</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zizhou Liu</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Davinroy%2C+M">Michael Davinroy</a>, <a href="/search/cs?searchtype=author&query=Cook%2C+J">James Cook</a>, <a href="/search/cs?searchtype=author&query=Cassani%2C+L">Laura Cassani</a>, <a href="/search/cs?searchtype=author&query=Trapeznikov%2C+K">Kirill Trapeznikov</a>, <a href="/search/cs?searchtype=author&query=Kirchner%2C+M">Matthias Kirchner</a>, <a href="/search/cs?searchtype=author&query=Basharat%2C+A">Arslan Basharat</a>, <a href="/search/cs?searchtype=author&query=Hoogs%2C+A">Anthony Hoogs</a>, <a href="/search/cs?searchtype=author&query=Garland%2C+J">Joshua Garland</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huan Liu</a>, <a href="/search/cs?searchtype=author&query=Hirschberg%2C+J">Julia Hirschberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12263v2-abstract-short" style="display: inline;"> The proliferation of Large Language Models (LLMs) poses challenges in detecting and mitigating digital deception, as these models can emulate human conversational patterns and facilitate chat-based social engineering (CSE) attacks. This study investigates the dual capabilities of LLMs as both facilitators and defenders against CSE threats. We develop a novel dataset, SEConvo, simulating CSE scenar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12263v2-abstract-full').style.display = 'inline'; document.getElementById('2406.12263v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12263v2-abstract-full" style="display: none;"> The proliferation of Large Language Models (LLMs) poses challenges in detecting and mitigating digital deception, as these models can emulate human conversational patterns and facilitate chat-based social engineering (CSE) attacks. This study investigates the dual capabilities of LLMs as both facilitators and defenders against CSE threats. We develop a novel dataset, SEConvo, simulating CSE scenarios in academic and recruitment contexts, and designed to examine how LLMs can be exploited in these situations. Our findings reveal that, while off-the-shelf LLMs generate high-quality CSE content, their detection capabilities are suboptimal, leading to increased operational costs for defense. In response, we propose ConvoSentinel, a modular defense pipeline that improves detection at both the message and the conversation levels, offering enhanced adaptability and cost-effectiveness. The retrieval-augmented module in ConvoSentinel identifies malicious intent by comparing messages to a database of similar conversations, enhancing CSE detection at all stages. Our study highlights the need for advanced strategies to leverage LLMs in cybersecurity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12263v2-abstract-full').style.display = 'none'; document.getElementById('2406.12263v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.07973">arXiv:2405.07973</a> <span> [<a href="https://arxiv.org/pdf/2405.07973">pdf</a>, <a href="https://arxiv.org/format/2405.07973">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> A Natural Formalized Proof Language </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+L">Lihan Xie</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhicheng Hui</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Q">Qinxiang Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.07973v1-abstract-short" style="display: inline;"> Artificial intelligence assisted mathematical proof has become a highly focused area nowadays. One key problem in this field is to generate formal mathematical proofs from natural language proofs. Due to historical reasons, the formal proof languages adopted by traditional theorem provers were not intended to represent natural language proofs. Therefore, they are not well-suited for the aforementi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07973v1-abstract-full').style.display = 'inline'; document.getElementById('2405.07973v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.07973v1-abstract-full" style="display: none;"> Artificial intelligence assisted mathematical proof has become a highly focused area nowadays. One key problem in this field is to generate formal mathematical proofs from natural language proofs. Due to historical reasons, the formal proof languages adopted by traditional theorem provers were not intended to represent natural language proofs. Therefore, they are not well-suited for the aforementioned tasks and proof-checking work for educational purposes. In this paper, we design a proof language and its corresponding abstract syntax tree and implement a proof checking tool for it. This language can be easily converted from natural language, thus providing a rich corpus of formal proof. Additionally, it supports the handling of issues in informal proofs through static analysis, and enhances the expressive power of the language by introducing the structure of partial proofs. This design combines the expressiveness of natural language and the accuracy of formal language, resulting in an improved mathematical proof language. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07973v1-abstract-full').style.display = 'none'; document.getElementById('2405.07973v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.02608">arXiv:2405.02608</a> <span> [<a href="https://arxiv.org/pdf/2405.02608">pdf</a>, <a href="https://arxiv.org/format/2405.02608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> UnSAMFlow: Unsupervised Optical Flow Guided by Segment Anything Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+S">Shuai Yuan</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Lei Luo</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhuo Hui</a>, <a href="/search/cs?searchtype=author&query=Pu%2C+C">Can Pu</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+X">Xiaoyu Xiang</a>, <a href="/search/cs?searchtype=author&query=Ranjan%2C+R">Rakesh Ranjan</a>, <a href="/search/cs?searchtype=author&query=Demandolx%2C+D">Denis Demandolx</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.02608v1-abstract-short" style="display: inline;"> Traditional unsupervised optical flow methods are vulnerable to occlusions and motion boundaries due to lack of object-level information. Therefore, we propose UnSAMFlow, an unsupervised flow network that also leverages object information from the latest foundation model Segment Anything Model (SAM). We first include a self-supervised semantic augmentation module tailored to SAM masks. We also ana… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.02608v1-abstract-full').style.display = 'inline'; document.getElementById('2405.02608v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.02608v1-abstract-full" style="display: none;"> Traditional unsupervised optical flow methods are vulnerable to occlusions and motion boundaries due to lack of object-level information. Therefore, we propose UnSAMFlow, an unsupervised flow network that also leverages object information from the latest foundation model Segment Anything Model (SAM). We first include a self-supervised semantic augmentation module tailored to SAM masks. We also analyze the poor gradient landscapes of traditional smoothness losses and propose a new smoothness definition based on homography instead. A simple yet effective mask feature module has also been added to further aggregate features on the object level. With all these adaptations, our method produces clear optical flow estimation with sharp boundaries around objects, which outperforms state-of-the-art methods on both KITTI and Sintel datasets. Our method also generalizes well across domains and runs very efficiently. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.02608v1-abstract-full').style.display = 'none'; document.getElementById('2405.02608v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2024. Code is available at https://github.com/facebookresearch/UnSAMFlow</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17991">arXiv:2404.17991</a> <span> [<a href="https://arxiv.org/pdf/2404.17991">pdf</a>, <a href="https://arxiv.org/format/2404.17991">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Pre-Trained Generative Language Models with Question Attended Span Extraction on Machine Reading Comprehension </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ai%2C+L">Lin Ai</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zizhou Liu</a>, <a href="/search/cs?searchtype=author&query=Hirschberg%2C+J">Julia Hirschberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17991v3-abstract-short" style="display: inline;"> Machine Reading Comprehension (MRC) poses a significant challenge in the field of Natural Language Processing (NLP). While mainstream MRC methods predominantly leverage extractive strategies using encoder-only models such as BERT, generative approaches face the issue of out-of-control generation -- a critical problem where answers generated are often incorrect, irrelevant, or unfaithful to the sou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17991v3-abstract-full').style.display = 'inline'; document.getElementById('2404.17991v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17991v3-abstract-full" style="display: none;"> Machine Reading Comprehension (MRC) poses a significant challenge in the field of Natural Language Processing (NLP). While mainstream MRC methods predominantly leverage extractive strategies using encoder-only models such as BERT, generative approaches face the issue of out-of-control generation -- a critical problem where answers generated are often incorrect, irrelevant, or unfaithful to the source text. To address these limitations in generative models for MRC, we introduce the Question-Attended Span Extraction (QASE) module. Integrated during the fine-tuning phase of pre-trained generative language models (PLMs), QASE significantly enhances their performance, allowing them to surpass the extractive capabilities of advanced Large Language Models (LLMs) such as GPT-4 in few-shot settings. Notably, these gains in performance do not come with an increase in computational demands. The efficacy of the QASE module has been rigorously tested across various datasets, consistently achieving or even surpassing state-of-the-art (SOTA) results, thereby bridging the gap between generative and extractive models in extractive MRC tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17991v3-abstract-full').style.display = 'none'; document.getElementById('2404.17991v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2403.04771</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05438">arXiv:2403.05438</a> <span> [<a href="https://arxiv.org/pdf/2403.05438">pdf</a>, <a href="https://arxiv.org/format/2403.05438">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VideoElevator: Elevating Video Generation Quality with Versatile Text-to-Image Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yabo Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Y">Yuxiang Wei</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xianhui Lin</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+P">Peiran Ren</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+X">Xuansong Xie</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+X">Xiangyang Ji</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+W">Wangmeng Zuo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05438v1-abstract-short" style="display: inline;"> Text-to-image diffusion models (T2I) have demonstrated unprecedented capabilities in creating realistic and aesthetic images. On the contrary, text-to-video diffusion models (T2V) still lag far behind in frame quality and text alignment, owing to insufficient quality and quantity of training videos. In this paper, we introduce VideoElevator, a training-free and plug-and-play method, which elevates… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05438v1-abstract-full').style.display = 'inline'; document.getElementById('2403.05438v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05438v1-abstract-full" style="display: none;"> Text-to-image diffusion models (T2I) have demonstrated unprecedented capabilities in creating realistic and aesthetic images. On the contrary, text-to-video diffusion models (T2V) still lag far behind in frame quality and text alignment, owing to insufficient quality and quantity of training videos. In this paper, we introduce VideoElevator, a training-free and plug-and-play method, which elevates the performance of T2V using superior capabilities of T2I. Different from conventional T2V sampling (i.e., temporal and spatial modeling), VideoElevator explicitly decomposes each sampling step into temporal motion refining and spatial quality elevating. Specifically, temporal motion refining uses encapsulated T2V to enhance temporal consistency, followed by inverting to the noise distribution required by T2I. Then, spatial quality elevating harnesses inflated T2I to directly predict less noisy latent, adding more photo-realistic details. We have conducted experiments in extensive prompts under the combination of various T2V and T2I. The results show that VideoElevator not only improves the performance of T2V baselines with foundational T2I, but also facilitates stylistic video synthesis with personalized T2I. Our code is available at https://github.com/YBYBZhang/VideoElevator. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05438v1-abstract-full').style.display = 'none'; document.getElementById('2403.05438v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://videoelevator.github.io Code: https://github.com/YBYBZhang/VideoElevator</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.04771">arXiv:2403.04771</a> <span> [<a href="https://arxiv.org/pdf/2403.04771">pdf</a>, <a href="https://arxiv.org/format/2403.04771">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> QASE Enhanced PLMs: Improved Control in Text Generation for MRC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ai%2C+L">Lin Ai</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zizhou Liu</a>, <a href="/search/cs?searchtype=author&query=Hirschberg%2C+J">Julia Hirschberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.04771v1-abstract-short" style="display: inline;"> To address the challenges of out-of-control generation in generative models for machine reading comprehension (MRC), we introduce the Question-Attended Span Extraction (QASE) module. Integrated during the fine-tuning of pre-trained generative language models (PLMs), QASE enables these PLMs to match SOTA extractive methods and outperform leading LLMs like GPT-4 in MRC tasks, without significant inc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04771v1-abstract-full').style.display = 'inline'; document.getElementById('2403.04771v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.04771v1-abstract-full" style="display: none;"> To address the challenges of out-of-control generation in generative models for machine reading comprehension (MRC), we introduce the Question-Attended Span Extraction (QASE) module. Integrated during the fine-tuning of pre-trained generative language models (PLMs), QASE enables these PLMs to match SOTA extractive methods and outperform leading LLMs like GPT-4 in MRC tasks, without significant increases in computational costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04771v1-abstract-full').style.display = 'none'; document.getElementById('2403.04771v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.09059">arXiv:2312.09059</a> <span> [<a href="https://arxiv.org/pdf/2312.09059">pdf</a>, <a href="https://arxiv.org/format/2312.09059">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Auto-Prox: Training-Free Vision Transformer Architecture Search via Automatic Proxy Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zimian Wei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lujun Li</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+P">Peijie Dong</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Anggeng Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+M">Menglong Lu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+H">Hengyue Pan</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Z">Zhiliang Tian</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dongsheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.09059v1-abstract-short" style="display: inline;"> The substantial success of Vision Transformer (ViT) in computer vision tasks is largely attributed to the architecture design. This underscores the necessity of efficient architecture search for designing better ViTs automatically. As training-based architecture search methods are computationally intensive, there is a growing interest in training-free methods that use zero-cost proxies to score Vi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09059v1-abstract-full').style.display = 'inline'; document.getElementById('2312.09059v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.09059v1-abstract-full" style="display: none;"> The substantial success of Vision Transformer (ViT) in computer vision tasks is largely attributed to the architecture design. This underscores the necessity of efficient architecture search for designing better ViTs automatically. As training-based architecture search methods are computationally intensive, there is a growing interest in training-free methods that use zero-cost proxies to score ViTs. However, existing training-free approaches require expert knowledge to manually design specific zero-cost proxies. Moreover, these zero-cost proxies exhibit limitations to generalize across diverse domains. In this paper, we introduce Auto-Prox, an automatic proxy discovery framework, to address the problem. First, we build the ViT-Bench-101, which involves different ViT candidates and their actual performance on multiple datasets. Utilizing ViT-Bench-101, we can evaluate zero-cost proxies based on their score-accuracy correlation. Then, we represent zero-cost proxies with computation graphs and organize the zero-cost proxy search space with ViT statistics and primitive operations. To discover generic zero-cost proxies, we propose a joint correlation metric to evolve and mutate different zero-cost proxy candidates. We introduce an elitism-preserve strategy for search efficiency to achieve a better trade-off between exploitation and exploration. Based on the discovered zero-cost proxy, we conduct a ViT architecture search in a training-free manner. Extensive experiments demonstrate that our method generalizes well to different datasets and achieves state-of-the-art results both in ranking correlation and final accuracy. Codes can be found at https://github.com/lilujunai/Auto-Prox-AAAI24. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09059v1-abstract-full').style.display = 'none'; document.getElementById('2312.09059v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepetd by AAAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.05107">arXiv:2312.05107</a> <span> [<a href="https://arxiv.org/pdf/2312.05107">pdf</a>, <a href="https://arxiv.org/format/2312.05107">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DreaMoving: A Human Video Generation Framework based on Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+M">Mengyang Feng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jinlin Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+K">Kai Yu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuan Yao</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xiefan Guo</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xianhui Lin</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+H">Haolan Xue</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+C">Chen Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaowen Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Aojie Li</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+X">Xiaoyang Kang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+B">Biwen Lei</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+M">Miaomiao Cui</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+P">Peiran Ren</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+X">Xuansong Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.05107v2-abstract-short" style="display: inline;"> In this paper, we present DreaMoving, a diffusion-based controllable video generation framework to produce high-quality customized human videos. Specifically, given target identity and posture sequences, DreaMoving can generate a video of the target identity moving or dancing anywhere driven by the posture sequences. To this end, we propose a Video ControlNet for motion-controlling and a Content G… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05107v2-abstract-full').style.display = 'inline'; document.getElementById('2312.05107v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.05107v2-abstract-full" style="display: none;"> In this paper, we present DreaMoving, a diffusion-based controllable video generation framework to produce high-quality customized human videos. Specifically, given target identity and posture sequences, DreaMoving can generate a video of the target identity moving or dancing anywhere driven by the posture sequences. To this end, we propose a Video ControlNet for motion-controlling and a Content Guider for identity preserving. The proposed model is easy to use and can be adapted to most stylized diffusion models to generate diverse results. The project page is available at https://dreamoving.github.io/dreamoving <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05107v2-abstract-full').style.display = 'none'; document.getElementById('2312.05107v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 5 figures, Tech. Report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.12398">arXiv:2305.12398</a> <span> [<a href="https://arxiv.org/pdf/2305.12398">pdf</a>, <a href="https://arxiv.org/format/2305.12398">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Language Knowledge-Assisted Representation Learning for Skeleton-Based Action Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Haojun Xu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yan Gao</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jie Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xinbo Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.12398v1-abstract-short" style="display: inline;"> How humans understand and recognize the actions of others is a complex neuroscientific problem that involves a combination of cognitive mechanisms and neural networks. Research has shown that humans have brain areas that recognize actions that process top-down attentional information, such as the temporoparietal association area. Also, humans have brain regions dedicated to understanding the minds… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12398v1-abstract-full').style.display = 'inline'; document.getElementById('2305.12398v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.12398v1-abstract-full" style="display: none;"> How humans understand and recognize the actions of others is a complex neuroscientific problem that involves a combination of cognitive mechanisms and neural networks. Research has shown that humans have brain areas that recognize actions that process top-down attentional information, such as the temporoparietal association area. Also, humans have brain regions dedicated to understanding the minds of others and analyzing their intentions, such as the medial prefrontal cortex of the temporal lobe. Skeleton-based action recognition creates mappings for the complex connections between the human skeleton movement patterns and behaviors. Although existing studies encoded meaningful node relationships and synthesized action representations for classification with good results, few of them considered incorporating a priori knowledge to aid potential representation learning for better performance. LA-GCN proposes a graph convolution network using large-scale language models (LLM) knowledge assistance. First, the LLM knowledge is mapped into a priori global relationship (GPR) topology and a priori category relationship (CPR) topology between nodes. The GPR guides the generation of new "bone" representations, aiming to emphasize essential node information from the data level. The CPR mapping simulates category prior knowledge in human brain regions, encoded by the PC-AC module and used to add additional supervision-forcing the model to learn class-distinguishable features. In addition, to improve information transfer efficiency in topology modeling, we propose multi-hop attention graph convolution. It aggregates each node's k-order neighbor simultaneously to speed up model convergence. LA-GCN reaches state-of-the-art on NTU RGB+D, NTU RGB+D 120, and NW-UCLA datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12398v1-abstract-full').style.display = 'none'; document.getElementById('2305.12398v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">first upload with 13 pages and 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.06176">arXiv:2305.06176</a> <span> [<a href="https://arxiv.org/pdf/2305.06176">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Fine-tuning Language Models with Generative Adversarial Reward Modelling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Z+Z">Zhang Ze Yu</a>, <a href="/search/cs?searchtype=author&query=Jaw%2C+L+J">Lau Jia Jaw</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhang Hui</a>, <a href="/search/cs?searchtype=author&query=Low%2C+B+K+H">Bryan Kian Hsiang Low</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.06176v3-abstract-short" style="display: inline;"> Reinforcement Learning with Human Feedback (RLHF) has been demonstrated to significantly enhance the performance of large language models (LLMs) by aligning their outputs with desired human values through instruction tuning. However, RLHF is constrained by the expertise and productivity limitations of human evaluators. A response to this downside is to fall back to supervised fine-tuning (SFT) wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.06176v3-abstract-full').style.display = 'inline'; document.getElementById('2305.06176v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.06176v3-abstract-full" style="display: none;"> Reinforcement Learning with Human Feedback (RLHF) has been demonstrated to significantly enhance the performance of large language models (LLMs) by aligning their outputs with desired human values through instruction tuning. However, RLHF is constrained by the expertise and productivity limitations of human evaluators. A response to this downside is to fall back to supervised fine-tuning (SFT) with additional carefully selected expert demonstrations. However, while this method has been proven to be effective, it invariably also leads to increased human-in-the-loop overhead. In this study, we propose another alternative approach: Reinforcement Learning with Generative Adversarial Feedback (RLGAF) to RLHF and SFT, which uses a generative adversarial training style to enable the LLMs to learn useful human expert demonstrations without being directly exposed to the training examples, thus enabling good generalization capabilities while preserving sample efficiency. Our preliminary findings indicate that RLGAF can help align LLMs outputs with competitive performance against RLHF and SFT, while not suffering from their respective inherent restrictions, suggesting promising avenues for further research on automating AI alignment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.06176v3-abstract-full').style.display = 'none'; document.getElementById('2305.06176v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 9 figures, 12 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.03814">arXiv:2305.03814</a> <span> [<a href="https://arxiv.org/pdf/2305.03814">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Deep Labeling of fMRI Brain Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Latheef%2C+A+A+P">Ammar Ahmed Pallikonda Latheef</a>, <a href="/search/cs?searchtype=author&query=Ghate%2C+S">Sejal Ghate</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhipeng Hui</a>, <a href="/search/cs?searchtype=author&query=Santamaria-Pang%2C+A">Alberto Santamaria-Pang</a>, <a href="/search/cs?searchtype=author&query=Tarapov%2C+I">Ivan Tarapov</a>, <a href="/search/cs?searchtype=author&query=Sair%2C+H+I">Haris I Sair</a>, <a href="/search/cs?searchtype=author&query=Jones%2C+C+K">Craig K Jones</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.03814v1-abstract-short" style="display: inline;"> Resting State Networks (RSNs) of the brain extracted from Resting State functional Magnetic Resonance Imaging (RS-fMRI) are used in the pre-surgical planning to guide the neurosurgeon. This is difficult, though, as expert knowledge is required to label each of the RSNs. There is a lack of efficient and standardized methods to be used in clinical workflows. Additionally, these methods need to be ge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.03814v1-abstract-full').style.display = 'inline'; document.getElementById('2305.03814v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.03814v1-abstract-full" style="display: none;"> Resting State Networks (RSNs) of the brain extracted from Resting State functional Magnetic Resonance Imaging (RS-fMRI) are used in the pre-surgical planning to guide the neurosurgeon. This is difficult, though, as expert knowledge is required to label each of the RSNs. There is a lack of efficient and standardized methods to be used in clinical workflows. Additionally, these methods need to be generalizable since the method needs to work well regardless of the acquisition technique. We propose an accurate, fast, and lightweight deep learning approach to label RSNs. Group Independent Component Analysis (ICA) was used to extract large scale functional connectivity patterns in the cohort and dual regression was used to back project them on individual subject RSNs. We compare a Multi-Layer Perceptron (MLP) based method with 2D and 3D Convolutional Neural Networks (CNNs) and find that the MLP is faster and more accurate. The MLP method performs as good or better than other works despite its compact size. We prove the generalizability of our method by showing that the MLP performs at 100% accuracy in the holdout dataset and 98.3% accuracy in three other sites' fMRI acquisitions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.03814v1-abstract-full').style.display = 'none'; document.getElementById('2305.03814v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 10 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.16493">arXiv:2303.16493</a> <span> [<a href="https://arxiv.org/pdf/2303.16493">pdf</a>, <a href="https://arxiv.org/format/2303.16493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AnyFlow: Arbitrary Scale Optical Flow with Implicit Neural Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jung%2C+H">Hyunyoung Jung</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhuo Hui</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Lei Luo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haitao Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/cs?searchtype=author&query=Yoo%2C+S">Sungjoo Yoo</a>, <a href="/search/cs?searchtype=author&query=Ranjan%2C+R">Rakesh Ranjan</a>, <a href="/search/cs?searchtype=author&query=Demandolx%2C+D">Denis Demandolx</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.16493v1-abstract-short" style="display: inline;"> To apply optical flow in practice, it is often necessary to resize the input to smaller dimensions in order to reduce computational costs. However, downsizing inputs makes the estimation more challenging because objects and motion ranges become smaller. Even though recent approaches have demonstrated high-quality flow estimation, they tend to fail to accurately model small objects and precise boun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.16493v1-abstract-full').style.display = 'inline'; document.getElementById('2303.16493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.16493v1-abstract-full" style="display: none;"> To apply optical flow in practice, it is often necessary to resize the input to smaller dimensions in order to reduce computational costs. However, downsizing inputs makes the estimation more challenging because objects and motion ranges become smaller. Even though recent approaches have demonstrated high-quality flow estimation, they tend to fail to accurately model small objects and precise boundaries when the input resolution is lowered, restricting their applicability to high-resolution inputs. In this paper, we introduce AnyFlow, a robust network that estimates accurate flow from images of various resolutions. By representing optical flow as a continuous coordinate-based representation, AnyFlow generates outputs at arbitrary scales from low-resolution inputs, demonstrating superior performance over prior works in capturing tiny objects with detail preservation on a wide range of scenes. We establish a new state-of-the-art performance of cross-dataset generalization on the KITTI dataset, while achieving comparable accuracy on the online benchmarks to other SOTA methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.16493v1-abstract-full').style.display = 'none'; document.getElementById('2303.16493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2023 (Highlight)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.02014">arXiv:2301.02014</a> <span> [<a href="https://arxiv.org/pdf/2301.02014">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> </div> </div> <p class="title is-5 mathjax"> C Sequential Optimization Numbers Group </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zile Hui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.02014v1-abstract-short" style="display: inline;"> We define C sequential optimization numbers, where C is a k+1-tuple vector. We prove that the unsigned Stirling numbers of first kind are (0,1) sequential optimization numbers. Many achievements of the Stirling numbers of first kind can be transformed into the properties of C sequential optimization numbers. We give some examples such as the recurrence formula and an instance of C sequential optim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.02014v1-abstract-full').style.display = 'inline'; document.getElementById('2301.02014v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.02014v1-abstract-full" style="display: none;"> We define C sequential optimization numbers, where C is a k+1-tuple vector. We prove that the unsigned Stirling numbers of first kind are (0,1) sequential optimization numbers. Many achievements of the Stirling numbers of first kind can be transformed into the properties of C sequential optimization numbers. We give some examples such as the recurrence formula and an instance of C sequential optimization numbers. We also extend some properties such as an upper bounder of them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.02014v1-abstract-full').style.display = 'none'; document.getElementById('2301.02014v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03885">arXiv:2211.03885</a> <span> [<a href="https://arxiv.org/pdf/2211.03885">pdf</a>, <a href="https://arxiv.org/format/2211.03885">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Learned Smartphone ISP on Mobile GPUs with Deep Learning, Mobile AI & AIM 2022 Challenge: Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ignatov%2C+A">Andrey Ignatov</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuai Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+C">Chaoyu Feng</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+F">Furui Bai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaotao Wang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+L">Lei Lei</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+Z">Ziyao Yi</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+Y">Yan Xiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zibin Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shaoqing Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+K">Keming Shi</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+D">Dehui Kong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Ke Xu</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+M">Minsu Kwon</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yaqi Wu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jiesi Zheng</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Z">Zhihao Fan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xun Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Feng Zhang</a>, <a href="/search/cs?searchtype=author&query=No%2C+A">Albert No</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+M">Minhyeok Cho</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zewen Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaze Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ran Li</a> , et al. (13 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03885v1-abstract-short" style="display: inline;"> The role of mobile cameras increased dramatically over the past few years, leading to more and more research in automatic image quality enhancement and RAW photo processing. In this Mobile AI challenge, the target was to develop an efficient end-to-end AI-based image signal processing (ISP) pipeline replacing the standard mobile ISPs that can run on modern smartphone GPUs using TensorFlow Lite. Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03885v1-abstract-full').style.display = 'inline'; document.getElementById('2211.03885v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03885v1-abstract-full" style="display: none;"> The role of mobile cameras increased dramatically over the past few years, leading to more and more research in automatic image quality enhancement and RAW photo processing. In this Mobile AI challenge, the target was to develop an efficient end-to-end AI-based image signal processing (ISP) pipeline replacing the standard mobile ISPs that can run on modern smartphone GPUs using TensorFlow Lite. The participants were provided with a large-scale Fujifilm UltraISP dataset consisting of thousands of paired photos captured with a normal mobile camera sensor and a professional 102MP medium-format FujiFilm GFX100 camera. The runtime of the resulting models was evaluated on the Snapdragon's 8 Gen 1 GPU that provides excellent acceleration results for the majority of common deep learning ops. The proposed solutions are compatible with all recent mobile GPUs, being able to process Full HD photos in less than 20-50 milliseconds while achieving high fidelity results. A detailed description of all models developed in this challenge is provided in this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03885v1-abstract-full').style.display = 'none'; document.getElementById('2211.03885v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.07052">arXiv:2206.07052</a> <span> [<a href="https://arxiv.org/pdf/2206.07052">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Sequential Optimization Numbers and Conjecture about Edge-Symmetry and Weight-Symmetry Shortest Weight-Constrained Path </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zile Hui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.07052v1-abstract-short" style="display: inline;"> This paper defines multidimensional sequential optimization numbers and prove that the unsigned Stirling numbers of first kind are 1-dimensional sequential optimization numbers. This paper gives a recurrence formula and an upper bound of multidimensional sequential optimization numbers. We proof that the k-dimensional sequential optimization numbers, denoted by O_k (n,m), are almost in {O_k (n,a)}… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.07052v1-abstract-full').style.display = 'inline'; document.getElementById('2206.07052v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.07052v1-abstract-full" style="display: none;"> This paper defines multidimensional sequential optimization numbers and prove that the unsigned Stirling numbers of first kind are 1-dimensional sequential optimization numbers. This paper gives a recurrence formula and an upper bound of multidimensional sequential optimization numbers. We proof that the k-dimensional sequential optimization numbers, denoted by O_k (n,m), are almost in {O_k (n,a)}, where a belong to[1,eklog(n-1)+(epi)^2/6(2^k-1)+M_1], n is the size of k-dimensional sequential optimization numbers and M_1 is large positive integer. Many achievements of the Stirling numbers of first kind can be transformed into the properties of k-dimensional sequential optimization numbers by k-dimensional extension and we give some examples. Shortest weight-constrained path is NP-complete problem [1]. In the case of edge symmetry and weight symmetry, we use the definition of the optimization set to design 2-dimensional Bellman-Ford algorithm to solve it. According to the fact that P_1 (n,m>M) less than or equal to e^(-M_1 ), where M=elog(n-1)+e+M_1, M_1 is a positive integer and P_1 (n,m) is the probability of 1-dimensional sequential optimization numbers, this paper conjecture that the probability of solving edge-symmetry and weight-symmetry shortest weight-constrained path problem in polynomial time approaches 1 exponentially with the increase of constant term in algorithm complexity. The results of a large number of simulation experiments agree with this conjecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.07052v1-abstract-full').style.display = 'none'; document.getElementById('2206.07052v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.05675">arXiv:2205.05675</a> <span> [<a href="https://arxiv.org/pdf/2205.05675">pdf</a>, <a href="https://arxiv.org/format/2205.05675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2022 Challenge on Efficient Super-Resolution: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yawei Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Van+Gool%2C+L">Luc Van Gool</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+F">Fangyuan Kong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mingxi Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Songwei Liu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Z">Zongcai Du</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Ding Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chenhui Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jingyi Chen</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Q">Qingrui Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zheyuan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yingqi Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiangyu Chen</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Haoming Cai</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+C">Chao Dong</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Long Sun</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J">Jinshan Pan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yi Zhu</a>, <a href="/search/cs?searchtype=author&query=Zong%2C+Z">Zhikai Zong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaoxiao Liu</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+T">Tao Yang</a> , et al. (86 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.05675v1-abstract-short" style="display: inline;"> This paper reviews the NTIRE 2022 challenge on efficient single image super-resolution with focus on the proposed solutions and results. The task of the challenge was to super-resolve an input image with a magnification factor of $\times$4 based on pairs of low and corresponding high resolution images. The aim was to design a network for single image super-resolution that achieved improvement of e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.05675v1-abstract-full').style.display = 'inline'; document.getElementById('2205.05675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.05675v1-abstract-full" style="display: none;"> This paper reviews the NTIRE 2022 challenge on efficient single image super-resolution with focus on the proposed solutions and results. The task of the challenge was to super-resolve an input image with a magnification factor of $\times$4 based on pairs of low and corresponding high resolution images. The aim was to design a network for single image super-resolution that achieved improvement of efficiency measured according to several metrics including runtime, parameters, FLOPs, activations, and memory consumption while at least maintaining the PSNR of 29.00dB on DIV2K validation set. IMDN is set as the baseline for efficiency measurement. The challenge had 3 tracks including the main track (runtime), sub-track one (model complexity), and sub-track two (overall performance). In the main track, the practical runtime performance of the submissions was evaluated. The rank of the teams were determined directly by the absolute value of the average runtime on the validation set and test set. In sub-track one, the number of parameters and FLOPs were considered. And the individual rankings of the two metrics were summed up to determine a final ranking in this track. In sub-track two, all of the five metrics mentioned in the description of the challenge including runtime, parameter count, FLOPs, activations, and memory consumption were considered. Similar to sub-track one, the rankings of five metrics were summed up to determine a final ranking. The challenge had 303 registered participants, and 43 teams made valid submissions. They gauge the state-of-the-art in efficient single image super-resolution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.05675v1-abstract-full').style.display = 'none'; document.getElementById('2205.05675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Validation code of the baseline model is available at https://github.com/ofsoundof/IMDN. Validation of all submitted models is available at https://github.com/ofsoundof/NTIRE2022_ESR</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.08826">arXiv:2105.08826</a> <span> [<a href="https://arxiv.org/pdf/2105.08826">pdf</a>, <a href="https://arxiv.org/format/2105.08826">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Real-Time Video Super-Resolution on Smartphones with Deep Learning, Mobile AI 2021 Challenge: Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ignatov%2C+A">Andrey Ignatov</a>, <a href="/search/cs?searchtype=author&query=Romero%2C+A">Andres Romero</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+H">Heewon Kim</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+C+M">Chiu Man Ho</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Z">Zibo Meng</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K+M">Kyoung Mu Lee</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxiang Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yutong Wang</a>, <a href="/search/cs?searchtype=author&query=Long%2C+Z">Zeyu Long</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chenhao Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yifei Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+B">Boshen Xu</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+L">Lixin Duan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wen Li</a>, <a href="/search/cs?searchtype=author&query=Bofei%2C+W">Wang Bofei</a>, <a href="/search/cs?searchtype=author&query=Diankai%2C+Z">Zhang Diankai</a>, <a href="/search/cs?searchtype=author&query=Chengjian%2C+Z">Zheng Chengjian</a>, <a href="/search/cs?searchtype=author&query=Shaoli%2C+L">Liu Shaoli</a>, <a href="/search/cs?searchtype=author&query=Si%2C+G">Gao Si</a>, <a href="/search/cs?searchtype=author&query=Xiaofeng%2C+Z">Zhang Xiaofeng</a>, <a href="/search/cs?searchtype=author&query=Kaidi%2C+L">Lu Kaidi</a>, <a href="/search/cs?searchtype=author&query=Tianyu%2C+X">Xu Tianyu</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a> , et al. (6 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.08826v1-abstract-short" style="display: inline;"> Video super-resolution has recently become one of the most important mobile-related problems due to the rise of video communication and streaming services. While many solutions have been proposed for this task, the majority of them are too computationally expensive to run on portable devices with limited hardware resources. To address this problem, we introduce the first Mobile AI challenge, where… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.08826v1-abstract-full').style.display = 'inline'; document.getElementById('2105.08826v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.08826v1-abstract-full" style="display: none;"> Video super-resolution has recently become one of the most important mobile-related problems due to the rise of video communication and streaming services. While many solutions have been proposed for this task, the majority of them are too computationally expensive to run on portable devices with limited hardware resources. To address this problem, we introduce the first Mobile AI challenge, where the target is to develop an end-to-end deep learning-based video super-resolution solutions that can achieve a real-time performance on mobile GPUs. The participants were provided with the REDS dataset and trained their models to do an efficient 4X video upscaling. The runtime of all models was evaluated on the OPPO Find X2 smartphone with the Snapdragon 865 SoC capable of accelerating floating-point networks on its Adreno GPU. The proposed solutions are fully compatible with any mobile GPU and can upscale videos to HD resolution at up to 80 FPS while demonstrating high fidelity results. A detailed description of all models developed in the challenge is provided in this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.08826v1-abstract-full').style.display = 'none'; document.getElementById('2105.08826v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Mobile AI 2021 Workshop and Challenges: https://ai-benchmark.com/workshops/mai/2021/. arXiv admin note: substantial text overlap with arXiv:2105.07825. substantial text overlap with arXiv:2105.08629, arXiv:2105.07809, arXiv:2105.08630</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.07604">arXiv:2009.07604</a> <span> [<a href="https://arxiv.org/pdf/2009.07604">pdf</a>, <a href="https://arxiv.org/format/2009.07604">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Compressing Facial Makeup Transfer Networks by Collaborative Distillation and Kernel Decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+B">Bianjiang Yang</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zi Hui</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Haoji Hu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xinyi Hu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lu Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.07604v1-abstract-short" style="display: inline;"> Although the facial makeup transfer network has achieved high-quality performance in generating perceptually pleasing makeup images, its capability is still restricted by the massive computation and storage of the network architecture. We address this issue by compressing facial makeup transfer networks with collaborative distillation and kernel decomposition. The main idea of collaborative distil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.07604v1-abstract-full').style.display = 'inline'; document.getElementById('2009.07604v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.07604v1-abstract-full" style="display: none;"> Although the facial makeup transfer network has achieved high-quality performance in generating perceptually pleasing makeup images, its capability is still restricted by the massive computation and storage of the network architecture. We address this issue by compressing facial makeup transfer networks with collaborative distillation and kernel decomposition. The main idea of collaborative distillation is underpinned by a finding that the encoder-decoder pairs construct an exclusive collaborative relationship, which is regarded as a new kind of knowledge for low-level vision tasks. For kernel decomposition, we apply the depth-wise separation of convolutional kernels to build a light-weighted Convolutional Neural Network (CNN) from the original network. Extensive experiments show the effectiveness of the compression method when applied to the state-of-the-art facial makeup transfer network -- BeautyGAN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.07604v1-abstract-full').style.display = 'none'; document.getElementById('2009.07604v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper will be published on 2020 IEEE International Conference on Visual Communications and Image Processing (VCIP)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.02609">arXiv:2002.02609</a> <span> [<a href="https://arxiv.org/pdf/2002.02609">pdf</a>, <a href="https://arxiv.org/format/2002.02609">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Image Fine-grained Inpainting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jie Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiumei Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xinbo Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.02609v2-abstract-short" style="display: inline;"> Image inpainting techniques have shown promising improvement with the assistance of generative adversarial networks (GANs) recently. However, most of them often suffered from completed results with unreasonable structure or blurriness. To mitigate this problem, in this paper, we present a one-stage model that utilizes dense combinations of dilated convolutions to obtain larger and more effective r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.02609v2-abstract-full').style.display = 'inline'; document.getElementById('2002.02609v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.02609v2-abstract-full" style="display: none;"> Image inpainting techniques have shown promising improvement with the assistance of generative adversarial networks (GANs) recently. However, most of them often suffered from completed results with unreasonable structure or blurriness. To mitigate this problem, in this paper, we present a one-stage model that utilizes dense combinations of dilated convolutions to obtain larger and more effective receptive fields. Benefited from the property of this network, we can more easily recover large regions in an incomplete image. To better train this efficient generator, except for frequently-used VGG feature matching loss, we design a novel self-guided regression loss for concentrating on uncertain areas and enhancing the semantic details. Besides, we devise a geometrical alignment constraint item to compensate for the pixel-based distance between prediction features and ground-truth ones. We also employ a discriminator with local and global branches to ensure local-global contents consistency. To further improve the quality of generated images, discriminator feature matching on the local branch is introduced, which dynamically minimizes the similarity of intermediate features between synthetic and ground-truth patches. Extensive experiments on several public datasets demonstrate that our approach outperforms current state-of-the-art methods. Code is available at https://github.com/Zheng222/DMFN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.02609v2-abstract-full').style.display = 'none'; document.getElementById('2002.02609v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.01249">arXiv:1911.01249</a> <span> [<a href="https://arxiv.org/pdf/1911.01249">pdf</a>, <a href="https://arxiv.org/format/1911.01249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AIM 2019 Challenge on Constrained Super-Resolution: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiumei Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xinbo Gao</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+D">Dongliang Xiong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuai Liu</a>, <a href="/search/cs?searchtype=author&query=Gang%2C+R">Ruipeng Gang</a>, <a href="/search/cs?searchtype=author&query=Nan%2C+N">Nan Nan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chenghua Li</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+X">Xueyi Zou</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+N">Ning Kang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhan Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hang Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chaofeng Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Linlin Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+J">Jun Shi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Wenyu Sun</a>, <a href="/search/cs?searchtype=author&query=Lang%2C+Z">Zhiqiang Lang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jiangtao Nie</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+W">Wei Wei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Y">Yazhe Niu</a> , et al. (4 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.01249v1-abstract-short" style="display: inline;"> This paper reviews the AIM 2019 challenge on constrained example-based single image super-resolution with focus on proposed solutions and results. The challenge had 3 tracks. Taking the three main aspects (i.e., number of parameters, inference/running time, fidelity (PSNR)) of MSRResNet as the baseline, Track 1 aims to reduce the amount of parameters while being constrained to maintain or improve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.01249v1-abstract-full').style.display = 'inline'; document.getElementById('1911.01249v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.01249v1-abstract-full" style="display: none;"> This paper reviews the AIM 2019 challenge on constrained example-based single image super-resolution with focus on proposed solutions and results. The challenge had 3 tracks. Taking the three main aspects (i.e., number of parameters, inference/running time, fidelity (PSNR)) of MSRResNet as the baseline, Track 1 aims to reduce the amount of parameters while being constrained to maintain or improve the running time and the PSNR result, Tracks 2 and 3 aim to optimize running time and PSNR result with constrain of the other two aspects, respectively. Each track had an average of 64 registered participants, and 12 teams submitted the final results. They gauge the state-of-the-art in single image super-resolution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.01249v1-abstract-full').style.display = 'none'; document.getElementById('1911.01249v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.11856">arXiv:1909.11856</a> <span> [<a href="https://arxiv.org/pdf/1909.11856">pdf</a>, <a href="https://arxiv.org/format/1909.11856">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3343031.3351084">10.1145/3343031.3351084 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Lightweight Image Super-Resolution with Information Multi-distillation Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xinbo Gao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yunchu Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiumei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.11856v1-abstract-short" style="display: inline;"> In recent years, single image super-resolution (SISR) methods using deep convolution neural network (CNN) have achieved impressive results. Thanks to the powerful representation capabilities of the deep networks, numerous previous ways can learn the complex non-linear mapping between low-resolution (LR) image patches and their high-resolution (HR) versions. However, excessive convolutions will lim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.11856v1-abstract-full').style.display = 'inline'; document.getElementById('1909.11856v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.11856v1-abstract-full" style="display: none;"> In recent years, single image super-resolution (SISR) methods using deep convolution neural network (CNN) have achieved impressive results. Thanks to the powerful representation capabilities of the deep networks, numerous previous ways can learn the complex non-linear mapping between low-resolution (LR) image patches and their high-resolution (HR) versions. However, excessive convolutions will limit the application of super-resolution technology in low computing power devices. Besides, super-resolution of any arbitrary scale factor is a critical issue in practical applications, which has not been well solved in the previous approaches. To address these issues, we propose a lightweight information multi-distillation network (IMDN) by constructing the cascaded information multi-distillation blocks (IMDB), which contains distillation and selective fusion parts. Specifically, the distillation module extracts hierarchical features step-by-step, and fusion module aggregates them according to the importance of candidate features, which is evaluated by the proposed contrast-aware channel attention mechanism. To process real images with any sizes, we develop an adaptive cropping strategy (ACS) to super-resolve block-wise image patches using the same well-trained model. Extensive experiments suggest that the proposed method performs favorably against the state-of-the-art SR algorithms in term of visual quality, memory footprint, and inference time. Code is available at \url{https://github.com/Zheng222/IMDN}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.11856v1-abstract-full').style.display = 'none'; document.getElementById('1909.11856v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be appear in ACM Multimedia 2019, https://github.com/Zheng222/IMDN</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1907.10399">arXiv:1907.10399</a> <span> [<a href="https://arxiv.org/pdf/1907.10399">pdf</a>, <a href="https://arxiv.org/format/1907.10399">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Progressive Perception-Oriented Network for Single Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jie Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xinbo Gao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiumei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1907.10399v2-abstract-short" style="display: inline;"> Recently, it has been demonstrated that deep neural networks can significantly improve the performance of single image super-resolution (SISR). Numerous studies have concentrated on raising the quantitative quality of super-resolved (SR) images. However, these methods that target PSNR maximization usually produce blurred images at large upscaling factor. The introduction of generative adversarial… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.10399v2-abstract-full').style.display = 'inline'; document.getElementById('1907.10399v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1907.10399v2-abstract-full" style="display: none;"> Recently, it has been demonstrated that deep neural networks can significantly improve the performance of single image super-resolution (SISR). Numerous studies have concentrated on raising the quantitative quality of super-resolved (SR) images. However, these methods that target PSNR maximization usually produce blurred images at large upscaling factor. The introduction of generative adversarial networks (GANs) can mitigate this issue and show impressive results with synthetic high-frequency textures. Nevertheless, these GAN-based approaches always have a tendency to add fake textures and even artifacts to make the SR image of visually higher-resolution. In this paper, we propose a novel perceptual image super-resolution method that progressively generates visually high-quality results by constructing a stage-wise network. Specifically, the first phase concentrates on minimizing pixel-wise error, and the second stage utilizes the features extracted by the previous stage to pursue results with better structural retention. The final stage employs fine structure features distilled by the second phase to produce more realistic results. In this way, we can maintain the pixel, and structural level information in the perceptual image as much as possible. It is useful to note that the proposed method can build three types of images in a feed-forward process. Also, we explore a new generator that adopts multi-scale hierarchical features fusion. Extensive experiments on benchmark datasets show that our approach is superior to the state-of-the-art methods. Code is available at https://github.com/Zheng222/PPON. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.10399v2-abstract-full').style.display = 'none'; document.getElementById('1907.10399v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Information Sciences 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.12481">arXiv:1811.12481</a> <span> [<a href="https://arxiv.org/pdf/1811.12481">pdf</a>, <a href="https://arxiv.org/format/1811.12481">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning to Separate Multiple Illuminants in a Single Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhuo Hui</a>, <a href="/search/cs?searchtype=author&query=Chakrabarti%2C+A">Ayan Chakrabarti</a>, <a href="/search/cs?searchtype=author&query=Sunkavalli%2C+K">Kalyan Sunkavalli</a>, <a href="/search/cs?searchtype=author&query=Sankaranarayanan%2C+A+C">Aswin C. Sankaranarayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.12481v2-abstract-short" style="display: inline;"> We present a method to separate a single image captured under two illuminants, with different spectra, into the two images corresponding to the appearance of the scene under each individual illuminant. We do this by training a deep neural network to predict the per-pixel reflectance chromaticity of the scene, which we use in conjunction with a previous flash/no-flash image-based separation algorit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.12481v2-abstract-full').style.display = 'inline'; document.getElementById('1811.12481v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.12481v2-abstract-full" style="display: none;"> We present a method to separate a single image captured under two illuminants, with different spectra, into the two images corresponding to the appearance of the scene under each individual illuminant. We do this by training a deep neural network to predict the per-pixel reflectance chromaticity of the scene, which we use in conjunction with a previous flash/no-flash image-based separation algorithm to produce the final two output images. We design our reflectance chromaticity network and loss functions by incorporating intuitions from the physics of image formation. We show that this leads to significantly better performance than other single image techniques and even approaches the quality of the two image separation method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.12481v2-abstract-full').style.display = 'none'; document.getElementById('1811.12481v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.01641">arXiv:1810.01641</a> <span> [<a href="https://arxiv.org/pdf/1810.01641">pdf</a>, <a href="https://arxiv.org/format/1810.01641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PIRM Challenge on Perceptual Image Enhancement on Smartphones: Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ignatov%2C+A">Andrey Ignatov</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Van+Vu%2C+T">Thang Van Vu</a>, <a href="/search/cs?searchtype=author&query=Luu%2C+T+M">Tung Minh Luu</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+T+X">Trung X Pham</a>, <a href="/search/cs?searchtype=author&query=Van+Nguyen%2C+C">Cao Van Nguyen</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yongwoo Kim</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+J">Jae-Seok Choi</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+M">Munchurl Kim</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jie Huang</a>, <a href="/search/cs?searchtype=author&query=Ran%2C+J">Jiewen Ran</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+C">Chen Xing</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xingguang Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+P">Pengfei Zhu</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+M">Mingrui Geng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yawei Li</a>, <a href="/search/cs?searchtype=author&query=Agustsson%2C+E">Eirikur Agustsson</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/cs?searchtype=author&query=Van+Gool%2C+L">Luc Van Gool</a>, <a href="/search/cs?searchtype=author&query=de+Stoutz%2C+E">Etienne de Stoutz</a>, <a href="/search/cs?searchtype=author&query=Kobyshev%2C+N">Nikolay Kobyshev</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+K">Kehui Nie</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yan Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Gen Li</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+T">Tong Tong</a> , et al. (23 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.01641v1-abstract-short" style="display: inline;"> This paper reviews the first challenge on efficient perceptual image enhancement with the focus on deploying deep learning models on smartphones. The challenge consisted of two tracks. In the first one, participants were solving the classical image super-resolution problem with a bicubic downscaling factor of 4. The second track was aimed at real-world photo enhancement, and the goal was to map lo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.01641v1-abstract-full').style.display = 'inline'; document.getElementById('1810.01641v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.01641v1-abstract-full" style="display: none;"> This paper reviews the first challenge on efficient perceptual image enhancement with the focus on deploying deep learning models on smartphones. The challenge consisted of two tracks. In the first one, participants were solving the classical image super-resolution problem with a bicubic downscaling factor of 4. The second track was aimed at real-world photo enhancement, and the goal was to map low-quality photos from the iPhone 3GS device to the same photos captured with a DSLR camera. The target metric used in this challenge combined the runtime, PSNR scores and solutions' perceptual results measured in the user study. To ensure the efficiency of the submitted models, we additionally measured their runtime and memory requirements on Android smartphones. The proposed solutions significantly improved baseline results defining the state-of-the-art for image enhancement on smartphones. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.01641v1-abstract-full').style.display = 'none'; document.getElementById('1810.01641v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.08373">arXiv:1805.08373</a> <span> [<a href="https://arxiv.org/pdf/1805.08373">pdf</a>, <a href="https://arxiv.org/format/1805.08373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Speeding-up Age Estimation in Intelligent Demographics System via Network Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhenzhen Hui</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+P">Peng Sun</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yonggang Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.08373v1-abstract-short" style="display: inline;"> Age estimation is a difficult task which requires the automatic detection and interpretation of facial features. Recently, Convolutional Neural Networks (CNNs) have made remarkable improvement on learning age patterns from benchmark datasets. However, for a face "in the wild" (from a video frame or Internet), the existing algorithms are not as accurate as for a frontal and neutral face. In additio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.08373v1-abstract-full').style.display = 'inline'; document.getElementById('1805.08373v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.08373v1-abstract-full" style="display: none;"> Age estimation is a difficult task which requires the automatic detection and interpretation of facial features. Recently, Convolutional Neural Networks (CNNs) have made remarkable improvement on learning age patterns from benchmark datasets. However, for a face "in the wild" (from a video frame or Internet), the existing algorithms are not as accurate as for a frontal and neutral face. In addition, with the increasing number of in-the-wild aging data, the computation speed of existing deep learning platforms becomes another crucial issue. In this paper, we propose a high-efficient age estimation system with joint optimization of age estimation algorithm and deep learning system. Cooperated with the city surveillance network, this system can provide age group analysis for intelligent demographics. First, we build a three-tier fog computing architecture including an edge, a fog and a cloud layer, which directly processes age estimation from raw videos. Second, we optimize the age estimation algorithm based on CNNs with label distribution and K-L divergence distance embedded in the fog layer and evaluate the model on the latest wild aging dataset. Experimental results demonstrate that: 1. our system collects the demographics data dynamically at far-distance without contact, and makes the city population analysis automatically; and 2. the age model training has been speed-up without losing training progress or model quality. To our best knowledge, this is the first intelligent demographics system which has potential applications in improving the efficiency of smart cities and urban living. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.08373v1-abstract-full').style.display = 'none'; document.getElementById('1805.08373v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1803.09454">arXiv:1803.09454</a> <span> [<a href="https://arxiv.org/pdf/1803.09454">pdf</a>, <a href="https://arxiv.org/format/1803.09454">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Fast and Accurate Single Image Super-Resolution via Information Distillation Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiumei Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xinbo Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1803.09454v1-abstract-short" style="display: inline;"> Recently, deep convolutional neural networks (CNNs) have been demonstrated remarkable progress on single image super-resolution. However, as the depth and width of the networks increase, CNN-based super-resolution methods have been faced with the challenges of computational complexity and memory consumption in practice. In order to solve the above questions, we propose a deep but compact convoluti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.09454v1-abstract-full').style.display = 'inline'; document.getElementById('1803.09454v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1803.09454v1-abstract-full" style="display: none;"> Recently, deep convolutional neural networks (CNNs) have been demonstrated remarkable progress on single image super-resolution. However, as the depth and width of the networks increase, CNN-based super-resolution methods have been faced with the challenges of computational complexity and memory consumption in practice. In order to solve the above questions, we propose a deep but compact convolutional network to directly reconstruct the high resolution image from the original low resolution image. In general, the proposed model consists of three parts, which are feature extraction block, stacked information distillation blocks and reconstruction block respectively. By combining an enhancement unit with a compression unit into a distillation block, the local long and short-path features can be effectively extracted. Specifically, the proposed enhancement unit mixes together two different types of features and the compression unit distills more useful information for the sequential blocks. In addition, the proposed network has the advantage of fast execution due to the comparatively few numbers of filters per layer and the use of group convolution. Experimental results demonstrate that the proposed method is superior to the state-of-the-art methods, especially in terms of time performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.09454v1-abstract-full').style.display = 'none'; document.getElementById('1803.09454v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in CVPR2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1704.05564">arXiv:1704.05564</a> <span> [<a href="https://arxiv.org/pdf/1704.05564">pdf</a>, <a href="https://arxiv.org/format/1704.05564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Illuminant Spectra-based Source Separation Using Flash Photography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhuo Hui</a>, <a href="/search/cs?searchtype=author&query=Sunkavalli%2C+K">Kalyan Sunkavalli</a>, <a href="/search/cs?searchtype=author&query=Hadap%2C+S">Sunil Hadap</a>, <a href="/search/cs?searchtype=author&query=Sankaranarayanan%2C+A+C">Aswin C. Sankaranarayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1704.05564v2-abstract-short" style="display: inline;"> Real-world lighting often consists of multiple illuminants with different spectra. Separating and manipulating these illuminants in post-process is a challenging problem that requires either significant manual input or calibrated scene geometry and lighting. In this work, we leverage a flash/no-flash image pair to analyze and edit scene illuminants based on their spectral differences. We derive a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1704.05564v2-abstract-full').style.display = 'inline'; document.getElementById('1704.05564v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1704.05564v2-abstract-full" style="display: none;"> Real-world lighting often consists of multiple illuminants with different spectra. Separating and manipulating these illuminants in post-process is a challenging problem that requires either significant manual input or calibrated scene geometry and lighting. In this work, we leverage a flash/no-flash image pair to analyze and edit scene illuminants based on their spectral differences. We derive a novel physics-based relationship between color variations in the observed flash/no-flash intensities and the spectra and surface shading corresponding to individual scene illuminants. Our technique uses this constraint to automatically separate an image into constituent images lit by each illuminant. This separation can be used to support applications like white balancing, lighting editing, and RGB photometric stereo, where we demonstrate results that outperform state-of-the-art techniques on a wide range of images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1704.05564v2-abstract-full').style.display = 'none'; document.getElementById('1704.05564v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 April, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1603.08039">arXiv:1603.08039</a> <span> [<a href="https://arxiv.org/pdf/1603.08039">pdf</a>, <a href="https://arxiv.org/format/1603.08039">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> An Empirical Study of Dimensional Reduction Techniques for Facial Action Units Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhuo Hui</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+W">Wen-Sheng Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1603.08039v1-abstract-short" style="display: inline;"> Biologically inspired features, such as Gabor filters, result in very high dimensional measurement. Does reducing the dimensionality of the feature space afford advantages beyond computational efficiency? Do some approaches to dimensionality reduction (DR) yield improved action unit detection? To answer these questions, we compared DR approaches in two relatively large databases of spontaneous fac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1603.08039v1-abstract-full').style.display = 'inline'; document.getElementById('1603.08039v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1603.08039v1-abstract-full" style="display: none;"> Biologically inspired features, such as Gabor filters, result in very high dimensional measurement. Does reducing the dimensionality of the feature space afford advantages beyond computational efficiency? Do some approaches to dimensionality reduction (DR) yield improved action unit detection? To answer these questions, we compared DR approaches in two relatively large databases of spontaneous facial behavior (45 participants in total with over 2 minutes of FACS-coded video per participant). Facial features were tracked and aligned using active appearance models (AAM). SIFT and Gabor features were extracted from local facial regions. We compared linear (PCA and KPCA), manifold (LPP and LLE), supervised (LDA and KDA) and hybrid approaches (LSDA) to DR with respect to AU detection. For further comparison, a no-DR control condition was included as well. Linear support vector machine classifiers with independent train and test sets were used for AU detection. AU detection was quantified using area under the ROC curve and F1. Baseline results for PCA with Gabor features were comparable with previous research. With some notable exceptions, DR improved AU detection relative to no-DR. Locality embedding approaches proved vulnerable to \emph{out-of-sample} problems. Gradient-based SIFT lead to better AU detection than the filter-based Gabor features. For area under the curve, few differences were found between linear and other DR approaches. For F1, results were mixed. For both metrics, the pattern of results varied among action units. These findings suggest that action unit detection may be optimized by using specific DR for specific action units. PCA and LDA were the most efficient approaches; KDA was the least efficient. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1603.08039v1-abstract-full').style.display = 'none'; document.getElementById('1603.08039v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Report on DR</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1512.05278">arXiv:1512.05278</a> <span> [<a href="https://arxiv.org/pdf/1512.05278">pdf</a>, <a href="https://arxiv.org/format/1512.05278">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Shape and Spatially-Varying Reflectance Estimation From Virtual Exemplars </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhuo Hui</a>, <a href="/search/cs?searchtype=author&query=Sankaranarayanan%2C+A+C">Aswin C Sankaranarayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1512.05278v3-abstract-short" style="display: inline;"> This paper addresses the problem of estimating the shape of objects that exhibit spatially-varying reflectance. We assume that multiple images of the object are obtained under a fixed view-point and varying illumination, i.e., the setting of photometric stereo. At the core of our techniques is the assumption that the BRDF at each pixel lies in the non-negative span of a known BRDF dictionary.This… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.05278v3-abstract-full').style.display = 'inline'; document.getElementById('1512.05278v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1512.05278v3-abstract-full" style="display: none;"> This paper addresses the problem of estimating the shape of objects that exhibit spatially-varying reflectance. We assume that multiple images of the object are obtained under a fixed view-point and varying illumination, i.e., the setting of photometric stereo. At the core of our techniques is the assumption that the BRDF at each pixel lies in the non-negative span of a known BRDF dictionary.This assumption enables a per-pixel surface normal and BRDF estimation framework that is computationally tractable and requires no initialization in spite of the underlying problem being non-convex. Our estimation framework first solves for the surface normal at each pixel using a variant of example-based photometric stereo. We design an efficient multi-scale search strategy for estimating the surface normal and subsequently, refine this estimate using a gradient descent procedure. Given the surface normal estimate, we solve for the spatially-varying BRDF by constraining the BRDF at each pixel to be in the span of the BRDF dictionary, here, we use additional priors to further regularize the solution. A hallmark of our approach is that it does not require iterative optimization techniques nor the need for careful initialization, both of which are endemic to most state-of-the-art techniques. We showcase the performance of our technique on a wide range of simulated and real scenes where we outperform competing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.05278v3-abstract-full').style.display = 'none'; document.getElementById('1512.05278v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">PAMI minor revision. arXiv admin note: substantial text overlap with arXiv:1503.04265</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1503.04265">arXiv:1503.04265</a> <span> [<a href="https://arxiv.org/pdf/1503.04265">pdf</a>, <a href="https://arxiv.org/format/1503.04265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Dictionary-based Approach for Estimating Shape and Spatially-Varying Reflectance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhuo Hui</a>, <a href="/search/cs?searchtype=author&query=Sankaranarayanan%2C+A+C">Aswin C. Sankaranarayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1503.04265v1-abstract-short" style="display: inline;"> We present a technique for estimating the shape and reflectance of an object in terms of its surface normals and spatially-varying BRDF. We assume that multiple images of the object are obtained under fixed view-point and varying illumination, i.e, the setting of photometric stereo. Assuming that the BRDF at each pixel lies in the non-negative span of a known BRDF dictionary, we derive a per-pixel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1503.04265v1-abstract-full').style.display = 'inline'; document.getElementById('1503.04265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1503.04265v1-abstract-full" style="display: none;"> We present a technique for estimating the shape and reflectance of an object in terms of its surface normals and spatially-varying BRDF. We assume that multiple images of the object are obtained under fixed view-point and varying illumination, i.e, the setting of photometric stereo. Assuming that the BRDF at each pixel lies in the non-negative span of a known BRDF dictionary, we derive a per-pixel surface normal and BRDF estimation framework that requires neither iterative optimization techniques nor careful initialization, both of which are endemic to most state-of-the-art techniques. We showcase the performance of our technique on a wide range of simulated and real scenes where we outperform competing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1503.04265v1-abstract-full').style.display = 'none'; document.getElementById('1503.04265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Intl. Conf. Computational Photography, 2015</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1409.6092">arXiv:1409.6092</a> <span> [<a href="https://arxiv.org/pdf/1409.6092">pdf</a>, <a href="https://arxiv.org/ps/1409.6092">ps</a>, <a href="https://arxiv.org/format/1409.6092">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> </div> </div> <p class="title is-5 mathjax"> Optimal Ternary Constant-Composition Codes with Weight Four and Distance Six </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hengjia%2C+W">Wei Hengjia</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zhang Hui</a>, <a href="/search/cs?searchtype=author&query=Mingzhi%2C+Z">Zhu Mingzhi</a>, <a href="/search/cs?searchtype=author&query=Gennian%2C+G">Ge Gennian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1409.6092v2-abstract-short" style="display: inline;"> The sizes of optimal constant-composition codes of weight three have been determined by Chee, Ge and Ling with four cases in doubt. Group divisible codes played an important role in their constructions. In this paper, we study the problem of constructing optimal ternary constant-composition codes with Hamming weight four and minimum distance six. The problem is solved with a small number of length… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1409.6092v2-abstract-full').style.display = 'inline'; document.getElementById('1409.6092v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1409.6092v2-abstract-full" style="display: none;"> The sizes of optimal constant-composition codes of weight three have been determined by Chee, Ge and Ling with four cases in doubt. Group divisible codes played an important role in their constructions. In this paper, we study the problem of constructing optimal ternary constant-composition codes with Hamming weight four and minimum distance six. The problem is solved with a small number of lengths undetermined. The previously known results are those with code length no greater than 10. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1409.6092v2-abstract-full').style.display = 'none'; document.getElementById('1409.6092v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2014; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 September, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">44 pages</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository