CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;44 of 44 results for author: <span class="mathjax">Cui, R</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Cui%2C+R">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Cui, R"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Cui%2C+R&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Cui, R"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17392">arXiv:2411.17392</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.17392">pdf</a>, <a href="https://arxiv.org/format/2411.17392">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NumGrad-Pull: Numerical Gradient Guided Tri-plane Representation for Surface Reconstruction from Point Clouds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruikai Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+S">Shi Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Anwar%2C+S">Saeed Anwar</a>, <a href="/search/cs?searchtype=author&amp;query=Barnes%2C+N">Nick Barnes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17392v1-abstract-short" style="display: inline;"> Reconstructing continuous surfaces from unoriented and unordered 3D points is a fundamental challenge in computer vision and graphics. Recent advancements address this problem by training neural signed distance functions to pull 3D location queries to their closest points on a surface, following the predicted signed distances and the analytical gradients computed by the network. In this paper, we&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17392v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17392v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17392v1-abstract-full" style="display: none;"> Reconstructing continuous surfaces from unoriented and unordered 3D points is a fundamental challenge in computer vision and graphics. Recent advancements address this problem by training neural signed distance functions to pull 3D location queries to their closest points on a surface, following the predicted signed distances and the analytical gradients computed by the network. In this paper, we introduce NumGrad-Pull, leveraging the representation capability of tri-plane structures to accelerate the learning of signed distance functions and enhance the fidelity of local details in surface reconstruction. To further improve the training stability of grid-based tri-planes, we propose to exploit numerical gradients, replacing conventional analytical computations. Additionally, we present a progressive plane expansion strategy to facilitate faster signed distance function convergence and design a data sampling strategy to mitigate reconstruction artifacts. Our extensive experiments across a variety of benchmarks demonstrate the effectiveness and robustness of our approach. Code is available at https://github.com/CuiRuikai/NumGrad-Pull <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17392v1-abstract-full').style.display = 'none'; document.getElementById('2411.17392v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07444">arXiv:2408.07444</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.07444">pdf</a>, <a href="https://arxiv.org/format/2408.07444">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Costal Cartilage Segmentation with Topology Guided Deformable Mamba: Method and Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Senmao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+H">Haifan Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Runmeng Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+B">Boyao Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yicheng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Z">Zhonglin Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+H">Haiqing Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jingyang Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+B">Bo Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+L">Lin Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+H">Haiyue Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07444v1-abstract-short" style="display: inline;"> Costal cartilage segmentation is crucial to various medical applications, necessitating precise and reliable techniques due to its complex anatomy and the importance of accurate diagnosis and surgical planning. We propose a novel deep learning-based approach called topology-guided deformable Mamba (TGDM) for costal cartilage segmentation. The TGDM is tailored to capture the intricate long-range co&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07444v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07444v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07444v1-abstract-full" style="display: none;"> Costal cartilage segmentation is crucial to various medical applications, necessitating precise and reliable techniques due to its complex anatomy and the importance of accurate diagnosis and surgical planning. We propose a novel deep learning-based approach called topology-guided deformable Mamba (TGDM) for costal cartilage segmentation. The TGDM is tailored to capture the intricate long-range costal cartilage relationships. Our method leverages a deformable model that integrates topological priors to enhance the adaptability and accuracy of the segmentation process. Furthermore, we developed a comprehensive benchmark that contains 165 cases for costal cartilage segmentation. This benchmark sets a new standard for evaluating costal cartilage segmentation techniques and provides a valuable resource for future research. Extensive experiments conducted on both in-domain benchmarks and out-of domain test sets demonstrate the superiority of our approach over existing methods, showing significant improvements in segmentation precision and robustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07444v1-abstract-full').style.display = 'none'; document.getElementById('2408.07444v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12857">arXiv:2407.12857</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.12857">pdf</a>, <a href="https://arxiv.org/format/2407.12857">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Automated Peer Reviewing in Paper SEA: Standardization, Evaluation, and Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Jianxiang Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Z">Zichen Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+J">Jiaqi Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+K">Kangyang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Weng%2C+Z">Zhenmin Weng</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chenghua Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+L">Long Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Renjing Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+C">Chengcheng Han</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Q">Qiushi Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhiyong Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+Y">Yunshi Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12857v2-abstract-short" style="display: inline;"> In recent years, the rapid increase in scientific papers has overwhelmed traditional review mechanisms, resulting in varying quality of publications. Although existing methods have explored the capabilities of Large Language Models (LLMs) for automated scientific reviewing, their generated contents are often generic or partial. To address the issues above, we introduce an automated paper reviewing&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12857v2-abstract-full').style.display = 'inline'; document.getElementById('2407.12857v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12857v2-abstract-full" style="display: none;"> In recent years, the rapid increase in scientific papers has overwhelmed traditional review mechanisms, resulting in varying quality of publications. Although existing methods have explored the capabilities of Large Language Models (LLMs) for automated scientific reviewing, their generated contents are often generic or partial. To address the issues above, we introduce an automated paper reviewing framework SEA. It comprises of three modules: Standardization, Evaluation, and Analysis, which are represented by models SEA-S, SEA-E, and SEA-A, respectively. Initially, SEA-S distills data standardization capabilities of GPT-4 for integrating multiple reviews for a paper. Then, SEA-E utilizes standardized data for fine-tuning, enabling it to generate constructive reviews. Finally, SEA-A introduces a new evaluation metric called mismatch score to assess the consistency between paper contents and reviews. Moreover, we design a self-correction strategy to enhance the consistency. Extensive experimental results on datasets collected from eight venues show that SEA can generate valuable insights for authors to improve their papers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12857v2-abstract-full').style.display = 'none'; document.getElementById('2407.12857v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06177">arXiv:2407.06177</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.06177">pdf</a>, <a href="https://arxiv.org/format/2407.06177">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Vision-Language Models under Cultural and Inclusive Considerations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Karamolegkou%2C+A">Antonia Karamolegkou</a>, <a href="/search/cs?searchtype=author&amp;query=Rust%2C+P">Phillip Rust</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yong Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=S%C3%B8gaard%2C+A">Anders S酶gaard</a>, <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06177v1-abstract-short" style="display: inline;"> Large vision-language models (VLMs) can assist visually impaired people by describing images from their daily lives. Current evaluation datasets may not reflect diverse cultural user backgrounds or the situational context of this use case. To address this problem, we create a survey to determine caption preferences and propose a culture-centric evaluation benchmark by filtering VizWiz, an existing&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06177v1-abstract-full').style.display = 'inline'; document.getElementById('2407.06177v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06177v1-abstract-full" style="display: none;"> Large vision-language models (VLMs) can assist visually impaired people by describing images from their daily lives. Current evaluation datasets may not reflect diverse cultural user backgrounds or the situational context of this use case. To address this problem, we create a survey to determine caption preferences and propose a culture-centric evaluation benchmark by filtering VizWiz, an existing dataset with images taken by people who are blind. We then evaluate several VLMs, investigating their reliability as visual assistants in a culturally diverse setting. While our results for state-of-the-art models are promising, we identify challenges such as hallucination and misalignment of automatic evaluation metrics with human judgment. We make our survey, data, code, and model outputs publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06177v1-abstract-full').style.display = 'none'; document.getElementById('2407.06177v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">HuCLLM @ ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17858">arXiv:2406.17858</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.17858">pdf</a>, <a href="https://arxiv.org/format/2406.17858">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Depth-Driven Geometric Prompt Learning for Laparoscopic Liver Landmark Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pei%2C+J">Jialun Pei</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruize Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yaoqian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Si%2C+W">Weixin Si</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+J">Jing Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Heng%2C+P">Pheng-Ann Heng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17858v2-abstract-short" style="display: inline;"> Laparoscopic liver surgery poses a complex intraoperative dynamic environment for surgeons, where remains a significant challenge to distinguish critical or even hidden structures inside the liver. Liver anatomical landmarks, e.g., ridge and ligament, serve as important markers for 2D-3D alignment, which can significantly enhance the spatial perception of surgeons for precise surgery. To facilitat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17858v2-abstract-full').style.display = 'inline'; document.getElementById('2406.17858v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17858v2-abstract-full" style="display: none;"> Laparoscopic liver surgery poses a complex intraoperative dynamic environment for surgeons, where remains a significant challenge to distinguish critical or even hidden structures inside the liver. Liver anatomical landmarks, e.g., ridge and ligament, serve as important markers for 2D-3D alignment, which can significantly enhance the spatial perception of surgeons for precise surgery. To facilitate the detection of laparoscopic liver landmarks, we collect a novel dataset called L3D, which comprises 1,152 frames with elaborated landmark annotations from surgical videos of 39 patients across two medical sites. For benchmarking purposes, 12 mainstream detection methods are selected and comprehensively evaluated on L3D. Further, we propose a depth-driven geometric prompt learning network, namely D2GPLand. Specifically, we design a Depth-aware Prompt Embedding (DPE) module that is guided by self-supervised prompts and generates semantically relevant geometric information with the benefit of global depth cues extracted from SAM-based features. Additionally, a Semantic-specific Geometric Augmentation (SGA) scheme is introduced to efficiently merge RGB-D spatial and geometric information through reverse anatomic perception. The experimental results indicate that D2GPLand obtains state-of-the-art performance on L3D, with 63.52% DICE and 48.68% IoU scores. Together with 2D-3D fusion technology, our method can directly provide the surgeon with intuitive guidance information in laparoscopic scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17858v2-abstract-full').style.display = 'none'; document.getElementById('2406.17858v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02310">arXiv:2406.02310</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.02310">pdf</a>, <a href="https://arxiv.org/format/2406.02310">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Disentangled Representation via Variational AutoEncoder for Continuous Treatment Effect Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruijing Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+J">Jianbin Sun</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+B">Bingyu He</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+K">Kewei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Ge%2C+B">Bingfeng Ge</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02310v1-abstract-short" style="display: inline;"> Continuous treatment effect estimation holds significant practical importance across various decision-making and assessment domains, such as healthcare and the military. However, current methods for estimating dose-response curves hinge on balancing the entire representation by treating all covariates as confounding variables. Although various approaches disentangle covariates into different facto&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02310v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02310v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02310v1-abstract-full" style="display: none;"> Continuous treatment effect estimation holds significant practical importance across various decision-making and assessment domains, such as healthcare and the military. However, current methods for estimating dose-response curves hinge on balancing the entire representation by treating all covariates as confounding variables. Although various approaches disentangle covariates into different factors for treatment effect estimation, they are confined to binary treatment settings. Moreover, observational data are often tainted with non-causal noise information that is imperceptible to the human. Hence, in this paper, we propose a novel Dose-Response curve estimator via Variational AutoEncoder (DRVAE) disentangled covariates representation. Our model is dedicated to disentangling covariates into instrumental factors, confounding factors, adjustment factors, and external noise factors, thereby facilitating the estimation of treatment effects under continuous treatment settings by balancing the disentangled confounding factors. Extensive results on synthetic and semi-synthetic datasets demonstrate that our model outperforms the current state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02310v1-abstract-full').style.display = 'none'; document.getElementById('2406.02310v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15622">arXiv:2405.15622</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.15622">pdf</a>, <a href="https://arxiv.org/format/2405.15622">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LAM3D: Large Image-Point-Cloud Alignment Model for 3D Reconstruction from Single Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruikai Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+X">Xibin Song</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+W">Weixuan Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Senbo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Weizhe Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Shenzhou Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Shang%2C+T">Taizhang Shang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Barnes%2C+N">Nick Barnes</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongdong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+P">Pan Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15622v1-abstract-short" style="display: inline;"> Large Reconstruction Models have made significant strides in the realm of automated 3D content generation from single or multiple input images. Despite their success, these models often produce 3D meshes with geometric inaccuracies, stemming from the inherent challenges of deducing 3D shapes solely from image data. In this work, we introduce a novel framework, the Large Image and Point Cloud Align&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15622v1-abstract-full').style.display = 'inline'; document.getElementById('2405.15622v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15622v1-abstract-full" style="display: none;"> Large Reconstruction Models have made significant strides in the realm of automated 3D content generation from single or multiple input images. Despite their success, these models often produce 3D meshes with geometric inaccuracies, stemming from the inherent challenges of deducing 3D shapes solely from image data. In this work, we introduce a novel framework, the Large Image and Point Cloud Alignment Model (LAM3D), which utilizes 3D point cloud data to enhance the fidelity of generated 3D meshes. Our methodology begins with the development of a point-cloud-based network that effectively generates precise and meaningful latent tri-planes, laying the groundwork for accurate 3D mesh reconstruction. Building upon this, our Image-Point-Cloud Feature Alignment technique processes a single input image, aligning to the latent tri-planes to imbue image features with robust 3D information. This process not only enriches the image features but also facilitates the production of high-fidelity 3D meshes without the need for multi-view input, significantly reducing geometric distortions. Our approach achieves state-of-the-art high-fidelity 3D mesh reconstruction from a single image in just 6 seconds, and experiments on various datasets demonstrate its effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15622v1-abstract-full').style.display = 'none'; document.getElementById('2405.15622v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.18241">arXiv:2403.18241</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.18241">pdf</a>, <a href="https://arxiv.org/format/2403.18241">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NeuSDFusion: A Spatial-Aware Generative Model for 3D Shape Completion, Reconstruction, and Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruikai Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Weizhe Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+W">Weixuan Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Senbo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Shang%2C+T">Taizhang Shang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+X">Xibin Song</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+H">Han Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhennan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Shenzhou Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongdong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+P">Pan Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.18241v2-abstract-short" style="display: inline;"> 3D shape generation aims to produce innovative 3D content adhering to specific conditions and constraints. Existing methods often decompose 3D shapes into a sequence of localized components, treating each element in isolation without considering spatial consistency. As a result, these approaches exhibit limited versatility in 3D data representation and shape generation, hindering their ability to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18241v2-abstract-full').style.display = 'inline'; document.getElementById('2403.18241v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.18241v2-abstract-full" style="display: none;"> 3D shape generation aims to produce innovative 3D content adhering to specific conditions and constraints. Existing methods often decompose 3D shapes into a sequence of localized components, treating each element in isolation without considering spatial consistency. As a result, these approaches exhibit limited versatility in 3D data representation and shape generation, hindering their ability to generate highly diverse 3D shapes that comply with the specified constraints. In this paper, we introduce a novel spatial-aware 3D shape generation framework that leverages 2D plane representations for enhanced 3D shape modeling. To ensure spatial coherence and reduce memory usage, we incorporate a hybrid shape representation technique that directly learns a continuous signed distance field representation of the 3D shape using orthogonal 2D planes. Additionally, we meticulously enforce spatial correspondences across distinct planes using a transformer-based autoencoder structure, promoting the preservation of spatial relationships in the generated 3D shapes. This yields an algorithm that consistently outperforms state-of-the-art 3D shape generation methods on various tasks, including unconditional shape generation, multi-modal shape completion, single-view reconstruction, and text-to-shape synthesis. Our project page is available at https://weizheliu.github.io/NeuSDFusion/ . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18241v2-abstract-full').style.display = 'none'; document.getElementById('2403.18241v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024, project page: https://weizheliu.github.io/NeuSDFusion/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01893">arXiv:2402.01893</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.01893">pdf</a>, <a href="https://arxiv.org/format/2402.01893">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3687956">10.1145/3687956 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Surface Reconstruction Using Rotation Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruiqi Cui</a>, <a href="/search/cs?searchtype=author&amp;query=G%C3%A6de%2C+E+T">Emil Toftegaard G忙de</a>, <a href="/search/cs?searchtype=author&amp;query=Rotenberg%2C+E">Eva Rotenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Kobbelt%2C+L">Leif Kobbelt</a>, <a href="/search/cs?searchtype=author&amp;query=B%C3%A6rentzen%2C+J+A">J. Andreas B忙rentzen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01893v2-abstract-short" style="display: inline;"> Inspired by the seminal result that a graph and an associated rotation system uniquely determine the topology of a closed manifold, we propose a combinatorial method for reconstruction of surfaces from points. Our method constructs a spanning tree and a rotation system. Since the tree is trivially a planar graph, its rotation system determines a genus zero surface with a single face which we proce&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01893v2-abstract-full').style.display = 'inline'; document.getElementById('2402.01893v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01893v2-abstract-full" style="display: none;"> Inspired by the seminal result that a graph and an associated rotation system uniquely determine the topology of a closed manifold, we propose a combinatorial method for reconstruction of surfaces from points. Our method constructs a spanning tree and a rotation system. Since the tree is trivially a planar graph, its rotation system determines a genus zero surface with a single face which we proceed to incrementally refine by inserting edges to split faces and thus merging them. In order to raise the genus, special handles are added by inserting edges between different faces and thus merging them. We apply our method to a wide range of input point clouds in order to investigate its effectiveness, and we compare our method to several other surface reconstruction methods. We find that our method offers better control over outlier classification, i.e. which points to include in the reconstructed surface, and also more control over the topology of the reconstructed surface. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01893v2-abstract-full').style.display = 'none'; document.getElementById('2402.01893v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ACM Trans. Graph. 43, 6, Article 190 (December 2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.17053">arXiv:2401.17053</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.17053">pdf</a>, <a href="https://arxiv.org/format/2401.17053">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> BlockFusion: Expandable 3D Scene Generation using Latent Tri-plane Extrapolation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhennan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+H">Han Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Shang%2C+T">Taizhang Shang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+W">Weixuan Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Senbo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruikai Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Weizhe Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Sato%2C+H">Hiroyuki Sato</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongdong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+P">Pan Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.17053v4-abstract-short" style="display: inline;"> We present BlockFusion, a diffusion-based model that generates 3D scenes as unit blocks and seamlessly incorporates new blocks to extend the scene. BlockFusion is trained using datasets of 3D blocks that are randomly cropped from complete 3D scene meshes. Through per-block fitting, all training blocks are converted into the hybrid neural fields: with a tri-plane containing the geometry features, f&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.17053v4-abstract-full').style.display = 'inline'; document.getElementById('2401.17053v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.17053v4-abstract-full" style="display: none;"> We present BlockFusion, a diffusion-based model that generates 3D scenes as unit blocks and seamlessly incorporates new blocks to extend the scene. BlockFusion is trained using datasets of 3D blocks that are randomly cropped from complete 3D scene meshes. Through per-block fitting, all training blocks are converted into the hybrid neural fields: with a tri-plane containing the geometry features, followed by a Multi-layer Perceptron (MLP) for decoding the signed distance values. A variational auto-encoder is employed to compress the tri-planes into the latent tri-plane space, on which the denoising diffusion process is performed. Diffusion applied to the latent representations allows for high-quality and diverse 3D scene generation. To expand a scene during generation, one needs only to append empty blocks to overlap with the current scene and extrapolate existing latent tri-planes to populate new blocks. The extrapolation is done by conditioning the generation process with the feature samples from the overlapping tri-planes during the denoising iterations. Latent tri-plane extrapolation produces semantically and geometrically meaningful transitions that harmoniously blend with the existing scene. A 2D layout conditioning mechanism is used to control the placement and arrangement of scene elements. Experimental results indicate that BlockFusion is capable of generating diverse, geometrically consistent and unbounded large 3D scenes with unprecedented high-quality shapes in both indoor and outdoor scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.17053v4-abstract-full').style.display = 'none'; document.getElementById('2401.17053v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACM Transactions on Graphics (SIGGRAPH&#39;24). Code: https://yang-l1.github.io/blockfusion</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.04975">arXiv:2401.04975</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.04975">pdf</a>, <a href="https://arxiv.org/format/2401.04975">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HaltingVT: Adaptive Token Halting Transformer for Efficient Video Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Q">Qian Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruoxuan Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yuke Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+H">Haoqi Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.04975v1-abstract-short" style="display: inline;"> Action recognition in videos poses a challenge due to its high computational cost, especially for Joint Space-Time video transformers (Joint VT). Despite their effectiveness, the excessive number of tokens in such architectures significantly limits their efficiency. In this paper, we propose HaltingVT, an efficient video transformer adaptively removing redundant video patch tokens, which is primar&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04975v1-abstract-full').style.display = 'inline'; document.getElementById('2401.04975v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.04975v1-abstract-full" style="display: none;"> Action recognition in videos poses a challenge due to its high computational cost, especially for Joint Space-Time video transformers (Joint VT). Despite their effectiveness, the excessive number of tokens in such architectures significantly limits their efficiency. In this paper, we propose HaltingVT, an efficient video transformer adaptively removing redundant video patch tokens, which is primarily composed of a Joint VT and a Glimpser module. Specifically, HaltingVT applies data-adaptive token reduction at each layer, resulting in a significant reduction in the overall computational cost. Besides, the Glimpser module quickly removes redundant tokens in shallow transformer layers, which may even be misleading for video recognition tasks based on our observations. To further encourage HaltingVT to focus on the key motion-related information in videos, we design an effective Motion Loss during training. HaltingVT acquires video analysis capabilities and token halting compression strategies simultaneously in a unified training process, without requiring additional training procedures or sub-networks. On the Mini-Kinetics dataset, we achieved 75.0% top-1 ACC with 24.2 GFLOPs, as well as 67.2% top-1 ACC with an extremely low 9.9 GFLOPs. The code is available at https://github.com/dun-research/HaltingVT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04975v1-abstract-full').style.display = 'none'; document.getElementById('2401.04975v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.17353">arXiv:2310.17353</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.17353">pdf</a>, <a href="https://arxiv.org/format/2310.17353">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Cultural Adaptation of Recipes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yong Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Kementchedjhieva%2C+Y">Yova Kementchedjhieva</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Karamolegkou%2C+A">Antonia Karamolegkou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+L">Li Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Dare%2C+M">Megan Dare</a>, <a href="/search/cs?searchtype=author&amp;query=Donatelli%2C+L">Lucia Donatelli</a>, <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.17353v1-abstract-short" style="display: inline;"> Building upon the considerable advances in Large Language Models (LLMs), we are now equipped to address more sophisticated tasks demanding a nuanced understanding of cross-cultural contexts. A key example is recipe adaptation, which goes beyond simple translation to include a grasp of ingredients, culinary techniques, and dietary preferences specific to a given culture. We introduce a new task inv&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17353v1-abstract-full').style.display = 'inline'; document.getElementById('2310.17353v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.17353v1-abstract-full" style="display: none;"> Building upon the considerable advances in Large Language Models (LLMs), we are now equipped to address more sophisticated tasks demanding a nuanced understanding of cross-cultural contexts. A key example is recipe adaptation, which goes beyond simple translation to include a grasp of ingredients, culinary techniques, and dietary preferences specific to a given culture. We introduce a new task involving the translation and cultural adaptation of recipes between Chinese and English-speaking cuisines. To support this investigation, we present CulturalRecipes, a unique dataset comprised of automatically paired recipes written in Mandarin Chinese and English. This dataset is further enriched with a human-written and curated test set. In this intricate task of cross-cultural recipe adaptation, we evaluate the performance of various methods, including GPT-4 and other LLMs, traditional machine translation, and information retrieval techniques. Our comprehensive analysis includes both automatic and human evaluation metrics. While GPT-4 exhibits impressive abilities in adapting Chinese recipes into English, it still lags behind human expertise when translating English recipes into Chinese. This underscores the multifaceted nature of cultural adaptations. We anticipate that these insights will significantly contribute to future research on culturally-aware language models and their practical application in culturally diverse contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17353v1-abstract-full').style.display = 'none'; document.getElementById('2310.17353v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to TACL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.06235">arXiv:2308.06235</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.06235">pdf</a>, <a href="https://arxiv.org/format/2308.06235">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> KETM:A Knowledge-Enhanced Text Matching method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+K">Kexin Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yahui Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+G">Guozhe Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhenguo Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Rongyi Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.06235v1-abstract-short" style="display: inline;"> Text matching is the task of matching two texts and determining the relationship between them, which has extensive applications in natural language processing tasks such as reading comprehension, and Question-Answering systems. The mainstream approach is to compute text representations or to interact with the text through attention mechanism, which is effective in text matching tasks. However, the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06235v1-abstract-full').style.display = 'inline'; document.getElementById('2308.06235v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.06235v1-abstract-full" style="display: none;"> Text matching is the task of matching two texts and determining the relationship between them, which has extensive applications in natural language processing tasks such as reading comprehension, and Question-Answering systems. The mainstream approach is to compute text representations or to interact with the text through attention mechanism, which is effective in text matching tasks. However, the performance of these models is insufficient for texts that require commonsense knowledge-based reasoning. To this end, in this paper, We introduce a new model for text matching called the Knowledge Enhanced Text Matching model (KETM), to enrich contextual representations with real-world common-sense knowledge from external knowledge sources to enhance our model understanding and reasoning. First, we use Wiktionary to retrieve the text word definitions as our external knowledge. Secondly, we feed text and knowledge to the text matching module to extract their feature vectors. The text matching module is used as an interaction module by integrating the encoder layer, the co-attention layer, and the aggregation layer. Specifically, the interaction process is iterated several times to obtain in-depth interaction information and extract the feature vectors of text and knowledge by multi-angle pooling. Then, we fuse text and knowledge using a gating mechanism to learn the ratio of text and knowledge fusion by a neural network that prevents noise generated by knowledge. After that, experimental validation on four datasets are carried out, and the experimental results show that our proposed model performs well on all four datasets, and the performance of our method is improved compared to the base model without adding external knowledge, which validates the effectiveness of our proposed method. The code is available at https://github.com/1094701018/KETM <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06235v1-abstract-full').style.display = 'none'; document.getElementById('2308.06235v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IJCNN 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.05426">arXiv:2308.05426</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.05426">pdf</a>, <a href="https://arxiv.org/ps/2308.05426">ps</a>, <a href="https://arxiv.org/format/2308.05426">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Low Rank Adaptation of Segment Anything to Salient Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruikai Cui</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+S">Siyuan He</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+S">Shi Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.05426v1-abstract-short" style="display: inline;"> Foundation models, such as OpenAI&#39;s GPT-3 and GPT-4, Meta&#39;s LLaMA, and Google&#39;s PaLM2, have revolutionized the field of artificial intelligence. A notable paradigm shift has been the advent of the Segment Anything Model (SAM), which has exhibited a remarkable capability to segment real-world objects, trained on 1 billion masks and 11 million images. Although SAM excels in general object segmentati&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05426v1-abstract-full').style.display = 'inline'; document.getElementById('2308.05426v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.05426v1-abstract-full" style="display: none;"> Foundation models, such as OpenAI&#39;s GPT-3 and GPT-4, Meta&#39;s LLaMA, and Google&#39;s PaLM2, have revolutionized the field of artificial intelligence. A notable paradigm shift has been the advent of the Segment Anything Model (SAM), which has exhibited a remarkable capability to segment real-world objects, trained on 1 billion masks and 11 million images. Although SAM excels in general object segmentation, it lacks the intrinsic ability to detect salient objects, resulting in suboptimal performance in this domain. To address this challenge, we present the Segment Salient Object Model (SSOM), an innovative approach that adaptively fine-tunes SAM for salient object detection by harnessing the low-rank structure inherent in deep learning. Comprehensive qualitative and quantitative evaluations across five challenging RGB benchmark datasets demonstrate the superior performance of our approach, surpassing state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05426v1-abstract-full').style.display = 'none'; document.getElementById('2308.05426v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 0 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.14726">arXiv:2307.14726</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.14726">pdf</a>, <a href="https://arxiv.org/format/2307.14726">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/iccv51070.2023.01320">10.1109/iccv51070.2023.01320 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> P2C: Self-Supervised Point Cloud Completion from Single Partial Clouds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruikai Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+S">Shi Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Anwar%2C+S">Saeed Anwar</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+C">Chaoyue Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Barnes%2C+N">Nick Barnes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.14726v1-abstract-short" style="display: inline;"> Point cloud completion aims to recover the complete shape based on a partial observation. Existing methods require either complete point clouds or multiple partial observations of the same object for learning. In contrast to previous approaches, we present Partial2Complete (P2C), the first self-supervised framework that completes point cloud objects using training samples consisting of only a sing&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14726v1-abstract-full').style.display = 'inline'; document.getElementById('2307.14726v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.14726v1-abstract-full" style="display: none;"> Point cloud completion aims to recover the complete shape based on a partial observation. Existing methods require either complete point clouds or multiple partial observations of the same object for learning. In contrast to previous approaches, we present Partial2Complete (P2C), the first self-supervised framework that completes point cloud objects using training samples consisting of only a single incomplete point cloud per object. Specifically, our framework groups incomplete point clouds into local patches as input and predicts masked patches by learning prior information from different partial objects. We also propose Region-Aware Chamfer Distance to regularize shape mismatch without limiting completion capability, and devise the Normal Consistency Constraint to incorporate a local planarity assumption, encouraging the recovered shape surface to be continuous and complete. In this way, P2C no longer needs multiple observations or complete point clouds as ground truth. Instead, structural cues are learned from a category-specific dataset to complete partial point clouds of objects. We demonstrate the effectiveness of our approach on both synthetic ShapeNet data and real-world ScanNet data, showing that P2C produces comparable results to methods trained with complete shapes, and outperforms methods learned with multiple partial observations. Code is available at https://github.com/CuiRuikai/Partial2Complete. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14726v1-abstract-full').style.display = 'none'; document.getElementById('2307.14726v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICCV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.13539">arXiv:2307.13539</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.13539">pdf</a>, <a href="https://arxiv.org/format/2307.13539">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Model Calibration in Dense Classification with Adaptive Label Perturbation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+C">Changkun Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruikai Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kaihao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Barnes%2C+N">Nick Barnes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.13539v2-abstract-short" style="display: inline;"> For safety-related applications, it is crucial to produce trustworthy deep neural networks whose prediction is associated with confidence that can represent the likelihood of correctness for subsequent decision-making. Existing dense binary classification models are prone to being over-confident. To improve model calibration, we propose Adaptive Stochastic Label Perturbation (ASLP) which learns a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13539v2-abstract-full').style.display = 'inline'; document.getElementById('2307.13539v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.13539v2-abstract-full" style="display: none;"> For safety-related applications, it is crucial to produce trustworthy deep neural networks whose prediction is associated with confidence that can represent the likelihood of correctness for subsequent decision-making. Existing dense binary classification models are prone to being over-confident. To improve model calibration, we propose Adaptive Stochastic Label Perturbation (ASLP) which learns a unique label perturbation level for each training image. ASLP employs our proposed Self-Calibrating Binary Cross Entropy (SC-BCE) loss, which unifies label perturbation processes including stochastic approaches (like DisturbLabel), and label smoothing, to correct calibration while maintaining classification rates. ASLP follows Maximum Entropy Inference of classic statistical mechanics to maximise prediction entropy with respect to missing information. It performs this while: (1) preserving classification accuracy on known data as a conservative solution, or (2) specifically improves model calibration degree by minimising the gap between the prediction accuracy and expected confidence of the target training label. Extensive results demonstrate that ASLP can significantly improve calibration degrees of dense binary classification models on both in-distribution and out-of-distribution data. The code is available on https://github.com/Carlisle-Liu/ASLP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13539v2-abstract-full').style.display = 'none'; document.getElementById('2307.13539v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICCV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.19597">arXiv:2305.19597</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.19597">pdf</a>, <a href="https://arxiv.org/format/2305.19597">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> What does the Failure to Reason with &#34;Respectively&#34; in Zero/Few-Shot Settings Tell Us about Language Models? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seolhwa Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a>, <a href="/search/cs?searchtype=author&amp;query=S%C3%B8gaard%2C+A">Anders S酶gaard</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.19597v1-abstract-short" style="display: inline;"> Humans can effortlessly understand the coordinate structure of sentences such as &#34;Niels Bohr and Kurt Cobain were born in Copenhagen and Seattle, respectively&#34;. In the context of natural language inference (NLI), we examine how language models (LMs) reason with respective readings (Gawron and Kehler, 2004) from two perspectives: syntactic-semantic and commonsense-world knowledge. We propose a cont&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19597v1-abstract-full').style.display = 'inline'; document.getElementById('2305.19597v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.19597v1-abstract-full" style="display: none;"> Humans can effortlessly understand the coordinate structure of sentences such as &#34;Niels Bohr and Kurt Cobain were born in Copenhagen and Seattle, respectively&#34;. In the context of natural language inference (NLI), we examine how language models (LMs) reason with respective readings (Gawron and Kehler, 2004) from two perspectives: syntactic-semantic and commonsense-world knowledge. We propose a controlled synthetic dataset WikiResNLI and a naturally occurring dataset NatResNLI to encompass various explicit and implicit realizations of &#34;respectively&#34;. We show that fine-tuned NLI models struggle with understanding such readings without explicit supervision. While few-shot learning is easy in the presence of explicit cues, longer training is required when the reading is evoked implicitly, leaving models to rely on common sense inferences. Furthermore, our fine-grained analysis indicates models fail to generalize across different constructions. To conclude, we demonstrate that LMs still lag behind humans in generalizing to the long tail of linguistic constructions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19597v1-abstract-full').style.display = 'none'; document.getElementById('2305.19597v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.06364">arXiv:2304.06364</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.06364">pdf</a>, <a href="https://arxiv.org/format/2304.06364">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AGIEval: A Human-Centric Benchmark for Evaluating Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+W">Wanjun Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yiduo Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+Y">Yaobo Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+S">Shuai Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yanlin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Saied%2C+A">Amin Saied</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Weizhu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Duan%2C+N">Nan Duan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.06364v2-abstract-short" style="display: inline;"> Evaluating the general abilities of foundation models to tackle human-level tasks is a vital aspect of their development and application in the pursuit of Artificial General Intelligence (AGI). Traditional benchmarks, which rely on artificial datasets, may not accurately represent human-level capabilities. In this paper, we introduce AGIEval, a novel benchmark specifically designed to assess found&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06364v2-abstract-full').style.display = 'inline'; document.getElementById('2304.06364v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.06364v2-abstract-full" style="display: none;"> Evaluating the general abilities of foundation models to tackle human-level tasks is a vital aspect of their development and application in the pursuit of Artificial General Intelligence (AGI). Traditional benchmarks, which rely on artificial datasets, may not accurately represent human-level capabilities. In this paper, we introduce AGIEval, a novel benchmark specifically designed to assess foundation model in the context of human-centric standardized exams, such as college entrance exams, law school admission tests, math competitions, and lawyer qualification tests. We evaluate several state-of-the-art foundation models, including GPT-4, ChatGPT, and Text-Davinci-003, using this benchmark. Impressively, GPT-4 surpasses average human performance on SAT, LSAT, and math competitions, attaining a 95% accuracy rate on the SAT Math test and a 92.5% accuracy on the English test of the Chinese national college entrance exam. This demonstrates the extraordinary performance of contemporary foundation models. In contrast, we also find that GPT-4 is less proficient in tasks that require complex reasoning or specific domain knowledge. Our comprehensive analyses of model capabilities (understanding, knowledge, reasoning, and calculation) reveal these models&#39; strengths and limitations, providing valuable insights into future directions for enhancing their general capabilities. By concentrating on tasks pertinent to human cognition and decision-making, our benchmark delivers a more meaningful and robust evaluation of foundation models&#39; performance in real-world scenarios. The data, code, and all model outputs are released in https://github.com/ruixiangcui/AGIEval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06364v2-abstract-full').style.display = 'none'; document.getElementById('2304.06364v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.06820">arXiv:2211.06820</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.06820">pdf</a>, <a href="https://arxiv.org/format/2211.06820">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Energy-Based Residual Latent Transport for Unsupervised Point Cloud Completion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruikai Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+S">Shi Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Anwar%2C+S">Saeed Anwar</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Barnes%2C+N">Nick Barnes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.06820v1-abstract-short" style="display: inline;"> Unsupervised point cloud completion aims to infer the whole geometry of a partial object observation without requiring partial-complete correspondence. Differing from existing deterministic approaches, we advocate generative modeling based unsupervised point cloud completion to explore the missing correspondence. Specifically, we propose a novel framework that performs completion by transforming a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.06820v1-abstract-full').style.display = 'inline'; document.getElementById('2211.06820v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.06820v1-abstract-full" style="display: none;"> Unsupervised point cloud completion aims to infer the whole geometry of a partial object observation without requiring partial-complete correspondence. Differing from existing deterministic approaches, we advocate generative modeling based unsupervised point cloud completion to explore the missing correspondence. Specifically, we propose a novel framework that performs completion by transforming a partial shape encoding into a complete one using a latent transport module, and it is designed as a latent-space energy-based model (EBM) in an encoder-decoder architecture, aiming to learn a probability distribution conditioned on the partial shape encoding. To train the latent code transport module and the encoder-decoder network jointly, we introduce a residual sampling strategy, where the residual captures the domain gap between partial and complete shape latent spaces. As a generative model-based framework, our method can produce uncertainty maps consistent with human perception, leading to explainable unsupervised point cloud completion. We experimentally show that the proposed method produces high-fidelity completion results, outperforming state-of-the-art models by a significant margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.06820v1-abstract-full').style.display = 'none'; document.getElementById('2211.06820v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">BMVC 2022 paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.02081">arXiv:2210.02081</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.02081">pdf</a>, <a href="https://arxiv.org/format/2210.02081">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Locate before Answering: Answer Guided Question Localization for Video Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qian%2C+T">Tianwen Qian</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ran Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jingjing Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+P">Pai Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+X">Xiaowei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yu-Gang Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.02081v2-abstract-short" style="display: inline;"> Video question answering (VideoQA) is an essential task in vision-language understanding, which has attracted numerous research attention recently. Nevertheless, existing works mostly achieve promising performances on short videos of duration within 15 seconds. For VideoQA on minute-level long-term videos, those methods are likely to fail because of lacking the ability to deal with noise and redun&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.02081v2-abstract-full').style.display = 'inline'; document.getElementById('2210.02081v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.02081v2-abstract-full" style="display: none;"> Video question answering (VideoQA) is an essential task in vision-language understanding, which has attracted numerous research attention recently. Nevertheless, existing works mostly achieve promising performances on short videos of duration within 15 seconds. For VideoQA on minute-level long-term videos, those methods are likely to fail because of lacking the ability to deal with noise and redundancy caused by scene changes and multiple actions in the video. Considering the fact that the question often remains concentrated in a short temporal range, we propose to first locate the question to a segment in the video and then infer the answer using the located segment only. Under this scheme, we propose &#34;Locate before Answering&#34; (LocAns), a novel approach that integrates a question locator and an answer predictor into an end-to-end model. During the training phase, the available answer label not only serves as the supervision signal of the answer predictor, but also is used to generate pseudo temporal labels for the question locator. Moreover, we design a decoupled alternative training strategy to update the two modules separately. In the experiments, LocAns achieves state-of-the-art performance on two modern long-term VideoQA datasets NExT-QA and ActivityNet-QA, and its qualitative examples show the reliable performance of the question localization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.02081v2-abstract-full').style.display = 'none'; document.getElementById('2210.02081v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.09668">arXiv:2208.09668</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2208.09668">pdf</a>, <a href="https://arxiv.org/format/2208.09668">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Generalised Co-Salient Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruikai Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kaihao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Weihao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Barnes%2C+N">Nick Barnes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.09668v3-abstract-short" style="display: inline;"> We propose a new setting that relaxes an assumption in the conventional Co-Salient Object Detection (CoSOD) setting by allowing the presence of &#34;noisy images&#34; which do not show the shared co-salient object. We call this new setting Generalised Co-Salient Object Detection (GCoSOD). We propose a novel random sampling based Generalised CoSOD Training (GCT) strategy to distill the awareness of inter-i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09668v3-abstract-full').style.display = 'inline'; document.getElementById('2208.09668v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.09668v3-abstract-full" style="display: none;"> We propose a new setting that relaxes an assumption in the conventional Co-Salient Object Detection (CoSOD) setting by allowing the presence of &#34;noisy images&#34; which do not show the shared co-salient object. We call this new setting Generalised Co-Salient Object Detection (GCoSOD). We propose a novel random sampling based Generalised CoSOD Training (GCT) strategy to distill the awareness of inter-image absence of co-salient objects into CoSOD models. It employs a Diverse Sampling Self-Supervised Learning (DS3L) that, in addition to the provided supervised co-salient label, introduces additional self-supervised labels for noisy images (being null, that no co-salient object is present). Further, the random sampling process inherent in GCT enables the generation of a high-quality uncertainty map highlighting potential false-positive predictions at instance level. To evaluate the performance of CoSOD models under the GCoSOD setting, we propose two new testing datasets, namely CoCA-Common and CoCA-Zero, where a common salient object is partially present in the former and completely absent in the latter. Extensive experiments demonstrate that our proposed method significantly improves the performance of CoSOD models in terms of the performance under the GCoSOD setting as well as the model calibration degrees. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09668v3-abstract-full').style.display = 'none'; document.getElementById('2208.09668v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.10615">arXiv:2204.10615</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.10615">pdf</a>, <a href="https://arxiv.org/format/2204.10615">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> Generalized Quantifiers as a Source of Error in Multilingual NLU Benchmarks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a>, <a href="/search/cs?searchtype=author&amp;query=S%C3%B8gaard%2C+A">Anders S酶gaard</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.10615v2-abstract-short" style="display: inline;"> Logical approaches to representing language have developed and evaluated computational models of quantifier words since the 19th century, but today&#39;s NLU models still struggle to capture their semantics. We rely on Generalized Quantifier Theory for language-independent representations of the semantics of quantifier words, to quantify their contribution to the errors of NLU models. We find that qua&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.10615v2-abstract-full').style.display = 'inline'; document.getElementById('2204.10615v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.10615v2-abstract-full" style="display: none;"> Logical approaches to representing language have developed and evaluated computational models of quantifier words since the 19th century, but today&#39;s NLU models still struggle to capture their semantics. We rely on Generalized Quantifier Theory for language-independent representations of the semantics of quantifier words, to quantify their contribution to the errors of NLU models. We find that quantifiers are pervasive in NLU benchmarks, and their occurrence at test time is associated with performance drops. Multilingual models also exhibit unsatisfying quantifier reasoning abilities, but not necessarily worse for non-English languages. To facilitate directly-targeted probing, we present an adversarial generalized quantifier NLI task (GQNLI) and show that pre-trained language models have a clear lack of robustness in generalized quantifier reasoning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.10615v2-abstract-full').style.display = 'none'; document.getElementById('2204.10615v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at NAACL 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.10281">arXiv:2204.10281</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.10281">pdf</a>, <a href="https://arxiv.org/format/2204.10281">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> How Conservative are Language Models? Adapting to the Introduction of Gender-Neutral Pronouns </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Brandl%2C+S">Stephanie Brandl</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=S%C3%B8gaard%2C+A">Anders S酶gaard</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.10281v2-abstract-short" style="display: inline;"> Gender-neutral pronouns have recently been introduced in many languages to a) include non-binary people and b) as a generic singular. Recent results from psycholinguistics suggest that gender-neutral pronouns (in Swedish) are not associated with human processing difficulties. This, we show, is in sharp contrast with automated processing. We show that gender-neutral pronouns in Danish, English, and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.10281v2-abstract-full').style.display = 'inline'; document.getElementById('2204.10281v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.10281v2-abstract-full" style="display: none;"> Gender-neutral pronouns have recently been introduced in many languages to a) include non-binary people and b) as a generic singular. Recent results from psycholinguistics suggest that gender-neutral pronouns (in Swedish) are not associated with human processing difficulties. This, we show, is in sharp contrast with automated processing. We show that gender-neutral pronouns in Danish, English, and Swedish are associated with higher perplexity, more dispersed attention patterns, and worse downstream performance. We argue that such conservativity in language models may limit widespread adoption of gender-neutral pronouns and must therefore be resolved. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.10281v2-abstract-full').style.display = 'none'; document.getElementById('2204.10281v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at NAACL 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.09409">arXiv:2204.09409</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.09409">pdf</a>, <a href="https://arxiv.org/format/2204.09409">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3477495.3532078">10.1145/3477495.3532078 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Video Moment Retrieval from Text Queries via Single Frame Annotation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ran Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Qian%2C+T">Tianwen Qian</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+P">Pai Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Daskalaki%2C+E">Elena Daskalaki</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jingjing Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+X">Xiaowei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Huyang Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yu-Gang Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.09409v3-abstract-short" style="display: inline;"> Video moment retrieval aims at finding the start and end timestamps of a moment (part of a video) described by a given natural language query. Fully supervised methods need complete temporal boundary annotations to achieve promising results, which is costly since the annotator needs to watch the whole moment. Weakly supervised methods only rely on the paired video and query, but the performance is&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09409v3-abstract-full').style.display = 'inline'; document.getElementById('2204.09409v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.09409v3-abstract-full" style="display: none;"> Video moment retrieval aims at finding the start and end timestamps of a moment (part of a video) described by a given natural language query. Fully supervised methods need complete temporal boundary annotations to achieve promising results, which is costly since the annotator needs to watch the whole moment. Weakly supervised methods only rely on the paired video and query, but the performance is relatively poor. In this paper, we look closer into the annotation process and propose a new paradigm called &#34;glance annotation&#34;. This paradigm requires the timestamp of only one single random frame, which we refer to as a &#34;glance&#34;, within the temporal boundary of the fully supervised counterpart. We argue this is beneficial because comparing to weak supervision, trivial cost is added yet more potential in performance is provided. Under the glance annotation setting, we propose a method named as Video moment retrieval via Glance Annotation (ViGA) based on contrastive learning. ViGA cuts the input video into clips and contrasts between clips and queries, in which glance guided Gaussian distributed weights are assigned to all clips. Our extensive experiments indicate that ViGA achieves better results than the state-of-the-art weakly supervised methods by a large margin, even comparable to fully supervised methods in some cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09409v3-abstract-full').style.display = 'none'; document.getElementById('2204.09409v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as full paper in SIGIR 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.10482">arXiv:2203.10482</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.10482">pdf</a>, <a href="https://arxiv.org/ps/2203.10482">ps</a>, <a href="https://arxiv.org/format/2203.10482">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DEIM: An effective deep encoding and interaction model for sentence matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+K">Kexin Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yahui Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Rongyi Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhenguo Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.10482v1-abstract-short" style="display: inline;"> Natural language sentence matching is the task of comparing two sentences and identifying the relationship between them.It has a wide range of applications in natural language processing tasks such as reading comprehension, question and answer systems. The main approach is to compute the interaction between text representations and sentence pairs through an attention mechanism, which can extract t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.10482v1-abstract-full').style.display = 'inline'; document.getElementById('2203.10482v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.10482v1-abstract-full" style="display: none;"> Natural language sentence matching is the task of comparing two sentences and identifying the relationship between them.It has a wide range of applications in natural language processing tasks such as reading comprehension, question and answer systems. The main approach is to compute the interaction between text representations and sentence pairs through an attention mechanism, which can extract the semantic information between sentence pairs well. However,this kind of method can not gain satisfactory results when dealing with complex semantic features. To solve this problem, we propose a sentence matching method based on deep encoding and interaction to extract deep semantic information. In the encoder layer,we refer to the information of another sentence in the process of encoding a single sentence, and later use a heuristic algorithm to fuse the information. In the interaction layer, we use a bidirectional attention mechanism and a self-attention mechanism to obtain deep semantic information.Finally, we perform a pooling operation and input it to the MLP for classification. we evaluate our model on three tasks: recognizing textual entailment, paraphrase recognition, and answer selection. We conducted experiments on the SNLI and SciTail datasets for the recognizing textual entailment task, the Quora dataset for the paraphrase recognition task, and the WikiQA dataset for the answer selection task. The experimental results show that the proposed algorithm can effectively extract deep semantic features that verify the effectiveness of the algorithm on sentence matching tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.10482v1-abstract-full').style.display = 'none'; document.getElementById('2203.10482v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.10020">arXiv:2203.10020</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.10020">pdf</a>, <a href="https://arxiv.org/format/2203.10020">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Challenges and Strategies in Cross-Cultural NLP </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a>, <a href="/search/cs?searchtype=author&amp;query=Frank%2C+S">Stella Frank</a>, <a href="/search/cs?searchtype=author&amp;query=Lent%2C+H">Heather Lent</a>, <a href="/search/cs?searchtype=author&amp;query=de+Lhoneux%2C+M">Miryam de Lhoneux</a>, <a href="/search/cs?searchtype=author&amp;query=Abdou%2C+M">Mostafa Abdou</a>, <a href="/search/cs?searchtype=author&amp;query=Brandl%2C+S">Stephanie Brandl</a>, <a href="/search/cs?searchtype=author&amp;query=Bugliarello%2C+E">Emanuele Bugliarello</a>, <a href="/search/cs?searchtype=author&amp;query=Piqueras%2C+L+C">Laura Cabello Piqueras</a>, <a href="/search/cs?searchtype=author&amp;query=Chalkidis%2C+I">Ilias Chalkidis</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Fierro%2C+C">Constanza Fierro</a>, <a href="/search/cs?searchtype=author&amp;query=Margatina%2C+K">Katerina Margatina</a>, <a href="/search/cs?searchtype=author&amp;query=Rust%2C+P">Phillip Rust</a>, <a href="/search/cs?searchtype=author&amp;query=S%C3%B8gaard%2C+A">Anders S酶gaard</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.10020v1-abstract-short" style="display: inline;"> Various efforts in the Natural Language Processing (NLP) community have been made to accommodate linguistic diversity and serve speakers of many different languages. However, it is important to acknowledge that speakers and the content they produce and require, vary not just by language, but also by culture. Although language and culture are tightly linked, there are important differences. Analogo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.10020v1-abstract-full').style.display = 'inline'; document.getElementById('2203.10020v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.10020v1-abstract-full" style="display: none;"> Various efforts in the Natural Language Processing (NLP) community have been made to accommodate linguistic diversity and serve speakers of many different languages. However, it is important to acknowledge that speakers and the content they produce and require, vary not just by language, but also by culture. Although language and culture are tightly linked, there are important differences. Analogous to cross-lingual and multilingual NLP, cross-cultural and multicultural NLP considers these differences in order to better serve users of NLP systems. We propose a principled framework to frame these efforts, and survey existing and potential strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.10020v1-abstract-full').style.display = 'none'; document.getElementById('2203.10020v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2022 - Theme track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.08888">arXiv:2111.08888</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2111.08888">pdf</a>, <a href="https://arxiv.org/format/2111.08888">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Random Graph-Based Neuromorphic Learning with a Layer-Weaken Structure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Mao%2C+R">Ruiqi Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Rongxin Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.08888v2-abstract-short" style="display: inline;"> Unified understanding of neuro networks (NNs) gets the users into great trouble because they have been puzzled by what kind of rules should be obeyed to optimize the internal structure of NNs. Considering the potential capability of random graphs to alter how computation is performed, we demonstrate that they can serve as architecture generators to optimize the internal structure of NNs. To transf&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.08888v2-abstract-full').style.display = 'inline'; document.getElementById('2111.08888v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.08888v2-abstract-full" style="display: none;"> Unified understanding of neuro networks (NNs) gets the users into great trouble because they have been puzzled by what kind of rules should be obeyed to optimize the internal structure of NNs. Considering the potential capability of random graphs to alter how computation is performed, we demonstrate that they can serve as architecture generators to optimize the internal structure of NNs. To transform the random graph theory into an NN model with practical meaning and based on clarifying the input-output relationship of each neuron, we complete data feature mapping by calculating Fourier Random Features (FRFs). Under the usage of this low-operation cost approach, neurons are assigned to several groups of which connection relationships can be regarded as uniform representations of random graphs they belong to, and random arrangement fuses those neurons to establish the pattern matrix, markedly reducing manual participation and computational cost without the fixed and deep architecture. Leveraging this single neuromorphic learning model termed random graph-based neuro network (RGNN) we develop a joint classification mechanism involving information interaction between multiple RGNNs and realize significant performance improvements in supervised learning for three benchmark tasks, whereby they effectively avoid the adverse impact of the interpretability of NNs on the structure design and engineering practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.08888v2-abstract-full').style.display = 'none'; document.getElementById('2111.08888v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.09885">arXiv:2108.09885</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2108.09885">pdf</a>, <a href="https://arxiv.org/ps/2108.09885">ps</a>, <a href="https://arxiv.org/format/2108.09885">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-030-85896-4_34">10.1007/978-3-030-85896-4_34 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DTWSSE: Data Augmentation with a Siamese Encoder for Time Series </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xinyu Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xinlan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhenguo Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yahui Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Rongyi Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.09885v1-abstract-short" style="display: inline;"> Access to labeled time series data is often limited in the real world, which constrains the performance of deep learning models in the field of time series analysis. Data augmentation is an effective way to solve the problem of small sample size and imbalance in time series datasets. The two key factors of data augmentation are the distance metric and the choice of interpolation method. SMOTE does&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.09885v1-abstract-full').style.display = 'inline'; document.getElementById('2108.09885v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.09885v1-abstract-full" style="display: none;"> Access to labeled time series data is often limited in the real world, which constrains the performance of deep learning models in the field of time series analysis. Data augmentation is an effective way to solve the problem of small sample size and imbalance in time series datasets. The two key factors of data augmentation are the distance metric and the choice of interpolation method. SMOTE does not perform well on time series data because it uses a Euclidean distance metric and interpolates directly on the object. Therefore, we propose a DTW-based synthetic minority oversampling technique using siamese encoder for interpolation named DTWSSE. In order to reasonably measure the distance of the time series, DTW, which has been verified to be an effective method forts, is employed as the distance metric. To adapt the DTW metric, we use an autoencoder trained in an unsupervised self-training manner for interpolation. The encoder is a Siamese Neural Network for mapping the time series data from the DTW hidden space to the Euclidean deep feature space, and the decoder is used to map the deep feature space back to the DTW hidden space. We validate the proposed methods on a number of different balanced or unbalanced time series datasets. Experimental results show that the proposed method can lead to better performance of the downstream deep learning model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.09885v1-abstract-full').style.display = 'none'; document.getElementById('2108.09885v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as full research paper in APWEB-WAIM 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.03509">arXiv:2108.03509</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2108.03509">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Compositional Generalization in Multilingual Semantic Parsing over Wikidata </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Aralikatte%2C+R">Rahul Aralikatte</a>, <a href="/search/cs?searchtype=author&amp;query=Lent%2C+H">Heather Lent</a>, <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.03509v2-abstract-short" style="display: inline;"> Semantic parsing (SP) allows humans to leverage vast knowledge resources through natural interaction. However, parsers are mostly designed for and evaluated on English resources, such as CFQ (Keysers et al., 2020), the current standard benchmark based on English data generated from grammar rules and oriented towards Freebase, an outdated knowledge base. We propose a method for creating a multiling&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.03509v2-abstract-full').style.display = 'inline'; document.getElementById('2108.03509v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.03509v2-abstract-full" style="display: none;"> Semantic parsing (SP) allows humans to leverage vast knowledge resources through natural interaction. However, parsers are mostly designed for and evaluated on English resources, such as CFQ (Keysers et al., 2020), the current standard benchmark based on English data generated from grammar rules and oriented towards Freebase, an outdated knowledge base. We propose a method for creating a multilingual, parallel dataset of question-query pairs, grounded in Wikidata. We introduce such a dataset, which we call Multilingual Compositional Wikidata Questions (MCWQ), and use it to analyze the compositional generalization of semantic parsers in Hebrew, Kannada, Chinese and English. While within-language generalization is comparable across languages, experiments on zero-shot cross-lingual transfer demonstrate that cross-lingual compositional generalization fails, even with state-of-the-art pretrained multilingual encoders. Furthermore, our methodology, dataset and results will facilitate future research on SP in more realistic and diverse settings than has been possible with existing resources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.03509v2-abstract-full').style.display = 'none'; document.getElementById('2108.03509v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to TACL; Authors&#39; final version, pre-MIT Press publication; Previous title: Multilingual Compositional Wikidata Questions</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.07364">arXiv:2106.07364</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.07364">pdf</a>, <a href="https://arxiv.org/ps/2106.07364">ps</a>, <a href="https://arxiv.org/format/2106.07364">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Meaning Representation of Numeric Fused-Heads in UCCA </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.07364v1-abstract-short" style="display: inline;"> We exhibit that the implicit UCCA parser does not address numeric fused-heads (NFHs) consistently, which could result either from inconsistent annotation, insufficient training data or a modelling limitation. and show which factors are involved. We consider this phenomenon important, as it is pervasive in text and critical for correct inference. Careful design and fine-grained annotation of NFHs i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.07364v1-abstract-full').style.display = 'inline'; document.getElementById('2106.07364v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.07364v1-abstract-full" style="display: none;"> We exhibit that the implicit UCCA parser does not address numeric fused-heads (NFHs) consistently, which could result either from inconsistent annotation, insufficient training data or a modelling limitation. and show which factors are involved. We consider this phenomenon important, as it is pervasive in text and critical for correct inference. Careful design and fine-grained annotation of NFHs in meaning representation frameworks would benefit downstream tasks such as machine translation, natural language inference and question answering, particularly when they require numeric reasoning, as recovering and categorizing them. We are investigating the treatment of this phenomenon by other meaning representations, such as AMR. We encourage researchers in meaning representations, and computational linguistics in general, to address this phenomenon in future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.07364v1-abstract-full').style.display = 'none'; document.getElementById('2106.07364v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">UnImplicit Workshop at ACL 2021 (abstract)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.02561">arXiv:2106.02561</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.02561">pdf</a>, <a href="https://arxiv.org/ps/2106.02561">ps</a>, <a href="https://arxiv.org/format/2106.02561">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Great Service! Fine-grained Parsing of Implicit Arguments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.02561v2-abstract-short" style="display: inline;"> Broad-coverage meaning representations in NLP mostly focus on explicitly expressed content. More importantly, the scarcity of datasets annotating diverse implicit roles limits empirical studies into their linguistic nuances. For example, in the web review &#34;Great service!&#34;, the provider and consumer are implicit arguments of different types. We examine an annotated corpus of fine-grained implicit a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02561v2-abstract-full').style.display = 'inline'; document.getElementById('2106.02561v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.02561v2-abstract-full" style="display: none;"> Broad-coverage meaning representations in NLP mostly focus on explicitly expressed content. More importantly, the scarcity of datasets annotating diverse implicit roles limits empirical studies into their linguistic nuances. For example, in the web review &#34;Great service!&#34;, the provider and consumer are implicit arguments of different types. We examine an annotated corpus of fine-grained implicit arguments (Cui and Hershcovich, 2020) by carefully re-annotating it, resolving several inconsistencies. Subsequently, we present the first transition-based neural parser that can handle implicit arguments dynamically, and experiment with two different transition systems on the improved dataset. We find that certain types of implicit arguments are more difficult to parse than others and that the simpler system is more accurate in recovering implicit arguments, despite having a lower overall parsing score, attesting current reasoning limitations of NLP models. This work will facilitate a better understanding of implicit and underspecified language, by incorporating it holistically into meaning representations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02561v2-abstract-full').style.display = 'none'; document.getElementById('2106.02561v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IWPT 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.05710">arXiv:2010.05710</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.05710">pdf</a>, <a href="https://arxiv.org/format/2010.05710">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> HUJI-KU at MRP~2020: Two Transition-based Neural Parsers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Arviv%2C+O">Ofir Arviv</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.05710v1-abstract-short" style="display: inline;"> This paper describes the HUJI-KU system submission to the shared task on Cross-Framework Meaning Representation Parsing (MRP) at the 2020 Conference for Computational Language Learning (CoNLL), employing TUPA and the HIT-SCIR parser, which were, respectively, the baseline system and winning system in the 2019 MRP shared task. Both are transition-based parsers using BERT contextualized embeddings.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.05710v1-abstract-full').style.display = 'inline'; document.getElementById('2010.05710v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.05710v1-abstract-full" style="display: none;"> This paper describes the HUJI-KU system submission to the shared task on Cross-Framework Meaning Representation Parsing (MRP) at the 2020 Conference for Computational Language Learning (CoNLL), employing TUPA and the HIT-SCIR parser, which were, respectively, the baseline system and winning system in the 2019 MRP shared task. Both are transition-based parsers using BERT contextualized embeddings. We generalized TUPA to support the newly-added MRP frameworks and languages, and experimented with multitask learning with the HIT-SCIR parser. We reached 4th place in both the cross-framework and cross-lingual tracks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.05710v1-abstract-full').style.display = 'none'; document.getElementById('2010.05710v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.13120">arXiv:2009.13120</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2009.13120">pdf</a>, <a href="https://arxiv.org/format/2009.13120">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1049/ipr2.12419">10.1049/ipr2.12419 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Medical Image Segmentation Using Deep Learning: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+R">Risheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+T">Tao Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixia Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+B">Bingtao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+H">Hongying Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Nandi%2C+A+K">Asoke K. Nandi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.13120v3-abstract-short" style="display: inline;"> Deep learning has been widely used for medical image segmentation and a large number of papers has been presented recording the success of deep learning in the field. In this paper, we present a comprehensive thematic survey on medical image segmentation using deep learning techniques. This paper makes two original contributions. Firstly, compared to traditional surveys that directly divide litera&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.13120v3-abstract-full').style.display = 'inline'; document.getElementById('2009.13120v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.13120v3-abstract-full" style="display: none;"> Deep learning has been widely used for medical image segmentation and a large number of papers has been presented recording the success of deep learning in the field. In this paper, we present a comprehensive thematic survey on medical image segmentation using deep learning techniques. This paper makes two original contributions. Firstly, compared to traditional surveys that directly divide literatures of deep learning on medical image segmentation into many groups and introduce literatures in detail for each group, we classify currently popular literatures according to a multi-level structure from coarse to fine. Secondly, this paper focuses on supervised and weakly supervised learning approaches, without including unsupervised approaches since they have been introduced in many old surveys and they are not popular currently. For supervised learning approaches, we analyze literatures in three aspects: the selection of backbone networks, the design of network blocks, and the improvement of loss functions. For weakly supervised learning approaches, we investigate literature according to data augmentation, transfer learning, and interactive segmentation, separately. Compared to existing surveys, this survey classifies the literatures very differently from before and is more convenient for readers to understand the relevant rationale and will guide them to think of appropriate improvements in medical image segmentation based on deep learning approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.13120v3-abstract-full').style.display = 'none'; document.getElementById('2009.13120v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.10194">arXiv:2006.10194</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2006.10194">pdf</a>, <a href="https://arxiv.org/format/2006.10194">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="General Economics">econ.GN</span> </div> </div> <p class="title is-5 mathjax"> Gender Inequality in Research Productivity During the COVID-19 Pandemic </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruomeng Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+H">Hao Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+F">Feng Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.10194v5-abstract-short" style="display: inline;"> We study the disproportionate impact of the lockdown as a result of the COVID-19 outbreak on female and male academics&#39; research productivity in social science. The lockdown has caused substantial disruptions to academic activities, requiring people to work from home. How this disruption affects productivity and the related gender equity is an important operations and societal question. We collect&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.10194v5-abstract-full').style.display = 'inline'; document.getElementById('2006.10194v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.10194v5-abstract-full" style="display: none;"> We study the disproportionate impact of the lockdown as a result of the COVID-19 outbreak on female and male academics&#39; research productivity in social science. The lockdown has caused substantial disruptions to academic activities, requiring people to work from home. How this disruption affects productivity and the related gender equity is an important operations and societal question. We collect data from the largest open-access preprint repository for social science on 41,858 research preprints in 18 disciplines produced by 76,832 authors across 25 countries over a span of two years. We use a difference-in-differences approach leveraging the exogenous pandemic shock. Our results indicate that, in the 10 weeks after the lockdown in the United States, although the total research productivity increased by 35%, female academics&#39; productivity dropped by 13.9% relative to that of male academics. We also show that several disciplines drive such gender inequality. Finally, we find that this intensified productivity gap is more pronounced for academics in top-ranked universities, and the effect exists in six other countries. Our work points out the fairness issue in productivity caused by the lockdown, a finding that universities will find helpful when evaluating faculty productivity. It also helps organizations realize the potential unintended consequences that can arise from telecommuting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.10194v5-abstract-full').style.display = 'none'; document.getElementById('2006.10194v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.12889">arXiv:2005.12889</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2005.12889">pdf</a>, <a href="https://arxiv.org/format/2005.12889">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Refining Implicit Argument Annotation for UCCA </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruixiang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.12889v4-abstract-short" style="display: inline;"> Predicate-argument structure analysis is a central component in meaning representations of text. The fact that some arguments are not explicitly mentioned in a sentence gives rise to ambiguity in language understanding, and renders it difficult for machines to interpret text correctly. However, only few resources represent implicit roles for NLU, and existing studies in NLP only make coarse distin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.12889v4-abstract-full').style.display = 'inline'; document.getElementById('2005.12889v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.12889v4-abstract-full" style="display: none;"> Predicate-argument structure analysis is a central component in meaning representations of text. The fact that some arguments are not explicitly mentioned in a sentence gives rise to ambiguity in language understanding, and renders it difficult for machines to interpret text correctly. However, only few resources represent implicit roles for NLU, and existing studies in NLP only make coarse distinctions between categories of arguments omitted from linguistic form. This paper proposes a typology for fine-grained implicit argument annotation on top of Universal Conceptual Cognitive Annotation&#39;s foundational layer. The proposed implicit argument categorisation is driven by theories of implicit role interpretation and consists of six types: Deictic, Generic, Genre-based, Type-identifiable, Non-specific, and Iterated-set. We exemplify our design by revisiting part of the UCCA EWT corpus, providing a new dataset annotated with the refinement layer, and making a comparative analysis with other schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.12889v4-abstract-full').style.display = 'none'; document.getElementById('2005.12889v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">DMR 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.12446">arXiv:1910.12446</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1910.12446">pdf</a>, <a href="https://arxiv.org/format/1910.12446">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Towards Successful Social Media Advertising: Predicting the Influence of Commercial Tweets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Renhao Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Agrawal%2C+G">Gagan Agrawal</a>, <a href="/search/cs?searchtype=author&amp;query=Ramnath%2C+R">Rajiv Ramnath</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.12446v1-abstract-short" style="display: inline;"> Businesses communicate using Twitter for a variety of reasons -- to raise awareness of their brands, to market new products, to respond to community comments, and to connect with their customers and potential customers in a targeted manner. For businesses to do this effectively, they need to understand which content and structural elements about a tweet make it influential, that is, widely liked,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.12446v1-abstract-full').style.display = 'inline'; document.getElementById('1910.12446v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.12446v1-abstract-full" style="display: none;"> Businesses communicate using Twitter for a variety of reasons -- to raise awareness of their brands, to market new products, to respond to community comments, and to connect with their customers and potential customers in a targeted manner. For businesses to do this effectively, they need to understand which content and structural elements about a tweet make it influential, that is, widely liked, followed, and retweeted. This paper presents a systematic methodology for analyzing commercial tweets, and predicting the influence on their readers. Our model, which use a combination of decoration and meta features, outperforms the prediction ability of the baseline model as well as the tweet embedding model. Further, in order to demonstrate a practical use of this work, we show how an unsuccessful tweet may be engineered (for example, reworded) to increase its potential for success. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.12446v1-abstract-full').style.display = 'none'; document.getElementById('1910.12446v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.11399">arXiv:1910.11399</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1910.11399">pdf</a>, <a href="https://arxiv.org/ps/1910.11399">ps</a>, <a href="https://arxiv.org/format/1910.11399">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Comparison of Quality Indicators in User-generated Content Using Social Media and Scholarly Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Das%2C+M">Manirupa Das</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Renhao Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.11399v1-abstract-short" style="display: inline;"> Predicting the quality of a text document is a critical task when presented with the problem of measuring the performance of a document before its release. In this work, we evaluate various features including those extracted from the text content (textual) and those describing higher-level characteristics of the text (meta) features that are not directly available from the text, and show how these&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.11399v1-abstract-full').style.display = 'inline'; document.getElementById('1910.11399v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.11399v1-abstract-full" style="display: none;"> Predicting the quality of a text document is a critical task when presented with the problem of measuring the performance of a document before its release. In this work, we evaluate various features including those extracted from the text content (textual) and those describing higher-level characteristics of the text (meta) features that are not directly available from the text, and show how these features inform prediction of document quality in different ways. Moreover, we also compare our methods on both social user-generated data such as tweets, and scholarly user-generated data such as academic articles, showing how the same features differently influence prediction of quality across these disparate domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.11399v1-abstract-full').style.display = 'none'; document.getElementById('1910.11399v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.04957">arXiv:1910.04957</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1910.04957">pdf</a>, <a href="https://arxiv.org/format/1910.04957">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> SoK: Hardware Security Support for Trustworthy Execution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+L">Lianying Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Shuang%2C+H">He Shuang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+S">Shengjie Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+W">Wei Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Rongzhen Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Bettadpur%2C+P">Pushkar Bettadpur</a>, <a href="/search/cs?searchtype=author&amp;query=Lie%2C+D">David Lie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.04957v1-abstract-short" style="display: inline;"> In recent years, there have emerged many new hardware mechanisms for improving the security of our computer systems. Hardware offers many advantages over pure software approaches: immutability of mechanisms to software attacks, better execution and power efficiency and a smaller interface allowing it to better maintain secrets. This has given birth to a plethora of hardware mechanisms providing tr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.04957v1-abstract-full').style.display = 'inline'; document.getElementById('1910.04957v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.04957v1-abstract-full" style="display: none;"> In recent years, there have emerged many new hardware mechanisms for improving the security of our computer systems. Hardware offers many advantages over pure software approaches: immutability of mechanisms to software attacks, better execution and power efficiency and a smaller interface allowing it to better maintain secrets. This has given birth to a plethora of hardware mechanisms providing trusted execution environments (TEEs), support for integrity checking and memory safety and widespread uses of hardware roots of trust. In this paper, we systematize these approaches through the lens of abstraction. Abstraction is key to computing systems, and the interface between hardware and software contains many abstractions. We find that these abstractions, when poorly designed, can both obscure information that is needed for security enforcement, as well as reveal information that needs to be kept secret, leading to vulnerabilities. We summarize such vulnerabilities and discuss several research trends of this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.04957v1-abstract-full').style.display = 'none'; document.getElementById('1910.04957v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1908.02551">arXiv:1908.02551</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1908.02551">pdf</a>, <a href="https://arxiv.org/ps/1908.02551">ps</a>, <a href="https://arxiv.org/format/1908.02551">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Tweets Can Tell: Activity Recognition using Hybrid Long Short-Term Memory Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Renhao Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Agrawal%2C+G">Gagan Agrawal</a>, <a href="/search/cs?searchtype=author&amp;query=Ramnath%2C+R">Rajiv Ramnath</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1908.02551v1-abstract-short" style="display: inline;"> This paper presents techniques to detect the &#34;offline&#34; activity a person is engaged in when she is tweeting (such as dining, shopping or entertainment), in order to create a dynamic profile of the user, for uses such as better targeting of advertisements. To this end, we propose a hybrid LSTM model for rich contextual learning, along with studies on the effects of applying and combining multiple L&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.02551v1-abstract-full').style.display = 'inline'; document.getElementById('1908.02551v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1908.02551v1-abstract-full" style="display: none;"> This paper presents techniques to detect the &#34;offline&#34; activity a person is engaged in when she is tweeting (such as dining, shopping or entertainment), in order to create a dynamic profile of the user, for uses such as better targeting of advertisements. To this end, we propose a hybrid LSTM model for rich contextual learning, along with studies on the effects of applying and combining multiple LSTM based methods with different contextual features. The hybrid model is shown to outperform a set of baselines and state-of-the-art methods. Finally, this paper presents an orthogonal validation with a real-case application. Our model generates an offline activity analysis for the followers of several well-known accounts, which is quite representative of the expected characteristics of these accounts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.02551v1-abstract-full').style.display = 'none'; document.getElementById('1908.02551v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.04610">arXiv:1806.04610</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1806.04610">pdf</a>, <a href="https://arxiv.org/ps/1806.04610">ps</a>, <a href="https://arxiv.org/format/1806.04610">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Novel Bayesian Approach for Latent Variable Modeling from Mixed Data with Missing Values </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Ruifei Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Bucur%2C+I+G">Ioan Gabriel Bucur</a>, <a href="/search/cs?searchtype=author&amp;query=Groot%2C+P">Perry Groot</a>, <a href="/search/cs?searchtype=author&amp;query=Heskes%2C+T">Tom Heskes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.04610v1-abstract-short" style="display: inline;"> We consider the problem of learning parameters of latent variable models from mixed (continuous and ordinal) data with missing values. We propose a novel Bayesian Gaussian copula factor (BGCF) approach that is consistent under certain conditions and that is quite robust to the violations of these conditions. In simulations, BGCF substantially outperforms two state-of-the-art alternative approaches&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.04610v1-abstract-full').style.display = 'inline'; document.getElementById('1806.04610v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.04610v1-abstract-full" style="display: none;"> We consider the problem of learning parameters of latent variable models from mixed (continuous and ordinal) data with missing values. We propose a novel Bayesian Gaussian copula factor (BGCF) approach that is consistent under certain conditions and that is quite robust to the violations of these conditions. In simulations, BGCF substantially outperforms two state-of-the-art alternative approaches. An illustration on the `Holzinger &amp; Swineford 1939&#39; dataset indicates that BGCF is favorable over the so-called robust maximum likelihood (MLR) even if the data match the assumptions of MLR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.04610v1-abstract-full').style.display = 'none'; document.getElementById('1806.04610v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1708.04106">arXiv:1708.04106</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1708.04106">pdf</a>, <a href="https://arxiv.org/format/1708.04106">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Rocket Launching: A Universal and Efficient Framework for Training Well-performing Light Net </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+G">Guorui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+Y">Ying Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Runpeng Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Bian%2C+W">Weijie Bian</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+X">Xiaoqiang Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Gai%2C+K">Kun Gai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1708.04106v3-abstract-short" style="display: inline;"> Models applied on real time response task, like click-through rate (CTR) prediction model, require high accuracy and rigorous response time. Therefore, top-performing deep models of high depth and complexity are not well suited for these applications with the limitations on the inference time. In order to further improve the neural networks&#39; performance given the time and computational limitations&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1708.04106v3-abstract-full').style.display = 'inline'; document.getElementById('1708.04106v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1708.04106v3-abstract-full" style="display: none;"> Models applied on real time response task, like click-through rate (CTR) prediction model, require high accuracy and rigorous response time. Therefore, top-performing deep models of high depth and complexity are not well suited for these applications with the limitations on the inference time. In order to further improve the neural networks&#39; performance given the time and computational limitations, we propose an approach that exploits a cumbersome net to help train the lightweight net for prediction. We dub the whole process rocket launching, where the cumbersome booster net is used to guide the learning of the target light net throughout the whole training process. We analyze different loss functions aiming at pushing the light net to behave similarly to the booster net, and adopt the loss with best performance in our experiments. We use one technique called gradient block to improve the performance of the light net and booster net further. Experiments on benchmark datasets and real-life industrial advertisement data present that our light model can get performance only previously achievable with more complex models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1708.04106v3-abstract-full').style.display = 'none'; document.getElementById('1708.04106v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 August, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, AAAI2018</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1507.08075">arXiv:1507.08075</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1507.08075">pdf</a>, <a href="https://arxiv.org/ps/1507.08075">ps</a>, <a href="https://arxiv.org/format/1507.08075">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Low Bit-Rate and High Fidelity Reversible Data Hiding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qu%2C+X">Xiaochao Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Suah Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Run Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H+J">Hyoung Joong Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1507.08075v1-abstract-short" style="display: inline;"> An accurate predictor is crucial for histogram-shifting (HS) based reversible data hiding methods. The embedding capacity is increased and the embedding distortion is decreased simultaneously if the predictor can generate accurate predictions. In this paper, we propose an accurate linear predictor based on weighted least squares (WLS) estimation. The robustness of WLS helps the proposed predictor&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1507.08075v1-abstract-full').style.display = 'inline'; document.getElementById('1507.08075v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1507.08075v1-abstract-full" style="display: none;"> An accurate predictor is crucial for histogram-shifting (HS) based reversible data hiding methods. The embedding capacity is increased and the embedding distortion is decreased simultaneously if the predictor can generate accurate predictions. In this paper, we propose an accurate linear predictor based on weighted least squares (WLS) estimation. The robustness of WLS helps the proposed predictor generate accurate predictions, especially in complex texture areas of an image, where other predictors usually fail. To further reduce the embedding distortion, we propose a new embedding method called dynamic histogram shifting with pixel selection (DHS-PS) that selects not only the proper histogram bins but also the proper pixel locations to embed the given data. As a result, the proposed method can obtain very high fidelity marked images with low bit-rate data embedded. The experimental results show that the proposed method outperforms the state-of-the-art low bit-rate reversible data hiding method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1507.08075v1-abstract-full').style.display = 'none'; document.getElementById('1507.08075v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2015. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1507.08064">arXiv:1507.08064</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1507.08064">pdf</a>, <a href="https://arxiv.org/ps/1507.08064">ps</a>, <a href="https://arxiv.org/format/1507.08064">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Collaborative Representation Classification Ensemble for Face Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qu%2C+X">Xiaochao Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Suah Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Run Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H+J">Hyoung Joong Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1507.08064v1-abstract-short" style="display: inline;"> Collaborative Representation Classification (CRC) for face recognition attracts a lot attention recently due to its good recognition performance and fast speed. Compared to Sparse Representation Classification (SRC), CRC achieves a comparable recognition performance with 10-1000 times faster speed. In this paper, we propose to ensemble several CRC models to promote the recognition rate, where each&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1507.08064v1-abstract-full').style.display = 'inline'; document.getElementById('1507.08064v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1507.08064v1-abstract-full" style="display: none;"> Collaborative Representation Classification (CRC) for face recognition attracts a lot attention recently due to its good recognition performance and fast speed. Compared to Sparse Representation Classification (SRC), CRC achieves a comparable recognition performance with 10-1000 times faster speed. In this paper, we propose to ensemble several CRC models to promote the recognition rate, where each CRC model uses different and divergent randomly generated biologically-inspired features as the face representation. The proposed ensemble algorithm calculates an ensemble weight for each CRC model that guided by the underlying classification rule of CRC. The obtained weights reflect the confidences of those CRC models where the more confident CRC models have larger weights. The proposed weighted ensemble method proves to be very effective and improves the performance of each CRC model significantly. Extensive experiments are conducted to show the superior performance of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1507.08064v1-abstract-full').style.display = 'none'; document.getElementById('1507.08064v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1506.06272">arXiv:1506.06272</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1506.06272">pdf</a>, <a href="https://arxiv.org/format/1506.06272">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Aligning where to see and what to tell: image caption with region-based attention and scene factorization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jin%2C+J">Junqi Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+K">Kun Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Runpeng Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Sha%2C+F">Fei Sha</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Changshui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1506.06272v1-abstract-short" style="display: inline;"> Recent progress on automatic generation of image captions has shown that it is possible to describe the most salient information conveyed by images with accurate and meaningful sentences. In this paper, we propose an image caption system that exploits the parallel structures between images and sentences. In our model, the process of generating the next word, given the previously generated ones, is&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1506.06272v1-abstract-full').style.display = 'inline'; document.getElementById('1506.06272v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1506.06272v1-abstract-full" style="display: none;"> Recent progress on automatic generation of image captions has shown that it is possible to describe the most salient information conveyed by images with accurate and meaningful sentences. In this paper, we propose an image caption system that exploits the parallel structures between images and sentences. In our model, the process of generating the next word, given the previously generated ones, is aligned with the visual perception experience where the attention shifting among the visual regions imposes a thread of visual ordering. This alignment characterizes the flow of &#34;abstract meaning&#34;, encoding what is semantically shared by both the visual scene and the text description. Our system also makes another novel modeling contribution by introducing scene-specific contexts that capture higher-level semantic information encoded in an image. The contexts adapt language models for word generation to specific scene types. We benchmark our system and contrast to published results on several popular datasets. We show that using either region-based attention or scene-specific contexts improves systems without those components. Furthermore, combining these two modeling ingredients attains the state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1506.06272v1-abstract-full').style.display = 'none'; document.getElementById('1506.06272v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2015. </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10