Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–26 of 26 results for author: <span class="mathjax">Fei, B</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Fei%2C+B">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Fei, B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Fei%2C+B&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Fei, B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05420">arXiv:2411.05420</a> <span> [<a href="https://arxiv.org/pdf/2411.05420">pdf</a>, <a href="https://arxiv.org/format/2411.05420">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> </div> </div> <p class="title is-5 mathjax"> WeatherGFM: Learning A Weather Generalist Foundation Model via In-context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiangyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zhiwang Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenlong Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yihao Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiangyu Chen</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+J">Junchao Gong</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shiqi Chen</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiao-Ming Wu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Lei Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05420v1-abstract-short" style="display: inline;"> The Earth's weather system encompasses intricate weather data modalities and diverse weather understanding tasks, which hold significant value to human life. Existing data-driven models focus on single weather understanding tasks (e.g., weather forecasting). Although these models have achieved promising results, they fail to tackle various complex tasks within a single and unified model. Moreover,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05420v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05420v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05420v1-abstract-full" style="display: none;"> The Earth's weather system encompasses intricate weather data modalities and diverse weather understanding tasks, which hold significant value to human life. Existing data-driven models focus on single weather understanding tasks (e.g., weather forecasting). Although these models have achieved promising results, they fail to tackle various complex tasks within a single and unified model. Moreover, the paradigm that relies on limited real observations for a single scenario hinders the model's performance upper bound. In response to these limitations, we draw inspiration from the in-context learning paradigm employed in state-of-the-art visual foundation models and large language models. In this paper, we introduce the first generalist weather foundation model (WeatherGFM), designed to address a wide spectrum of weather understanding tasks in a unified manner. More specifically, we initially unify the representation and definition of the diverse weather understanding tasks. Subsequently, we devised weather prompt formats to manage different weather data modalities, namely single, multiple, and temporal modalities. Finally, we adopt a visual prompting question-answering paradigm for the training of unified weather understanding tasks. Extensive experiments indicate that our WeatherGFM can effectively handle up to ten weather understanding tasks, including weather forecasting, super-resolution, weather image translation, and post-processing. Our method also showcases generalization ability on unseen tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05420v1-abstract-full').style.display = 'none'; document.getElementById('2411.05420v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15941">arXiv:2410.15941</a> <span> [<a href="https://arxiv.org/pdf/2410.15941">pdf</a>, <a href="https://arxiv.org/format/2410.15941">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MBPU: A Plug-and-Play State Space Model for Point Cloud Upsamping with Fast Point Rendering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+J">Jiayi Song</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhijun Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wen-Ming Chen</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15941v1-abstract-short" style="display: inline;"> The task of point cloud upsampling (PCU) is to generate dense and uniform point clouds from sparse input captured by 3D sensors like LiDAR, holding potential applications in real yet is still a challenging task. Existing deep learning-based methods have shown significant achievements in this field. However, they still face limitations in effectively handling long sequences and addressing the issue… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15941v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15941v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15941v1-abstract-full" style="display: none;"> The task of point cloud upsampling (PCU) is to generate dense and uniform point clouds from sparse input captured by 3D sensors like LiDAR, holding potential applications in real yet is still a challenging task. Existing deep learning-based methods have shown significant achievements in this field. However, they still face limitations in effectively handling long sequences and addressing the issue of shrinkage artifacts around the surface of the point cloud. Inspired by the newly proposed Mamba, in this paper, we introduce a network named MBPU built on top of the Mamba architecture, which performs well in long sequence modeling, especially for large-scale point cloud upsampling, and achieves fast convergence speed. Moreover, MBPU is an arbitrary-scale upsampling framework as the predictor of point distance in the point refinement phase. At the same time, we simultaneously predict the 3D position shift and 1D point-to-point distance as regression quantities to constrain the global features while ensuring the accuracy of local details. We also introduce a fast differentiable renderer to further enhance the fidelity of the upsampled point cloud and reduce artifacts. It is noted that, by the merits of our fast point rendering, MBPU yields high-quality upsampled point clouds by effectively eliminating surface noise. Extensive experiments have demonstrated that our MBPU outperforms other off-the-shelf methods in terms of point cloud upsampling, especially for large-scale point clouds. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15941v1-abstract-full').style.display = 'none'; document.getElementById('2410.15941v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14732">arXiv:2410.14732</a> <span> [<a href="https://arxiv.org/pdf/2410.14732">pdf</a>, <a href="https://arxiv.org/format/2410.14732">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> </div> </div> <p class="title is-5 mathjax"> SIFM: A Foundation Model for Multi-granularity Arctic Sea Ice Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jingyi Xu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yeqi Luo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Keyi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shengnan Wang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Lei Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14732v1-abstract-short" style="display: inline;"> Arctic sea ice performs a vital role in global climate and has paramount impacts on both polar ecosystems and coastal communities. In the last few years, multiple deep learning based pan-Arctic sea ice concentration (SIC) forecasting methods have emerged and showcased superior performance over physics-based dynamical models. However, previous methods forecast SIC at a fixed temporal granularity, e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14732v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14732v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14732v1-abstract-full" style="display: none;"> Arctic sea ice performs a vital role in global climate and has paramount impacts on both polar ecosystems and coastal communities. In the last few years, multiple deep learning based pan-Arctic sea ice concentration (SIC) forecasting methods have emerged and showcased superior performance over physics-based dynamical models. However, previous methods forecast SIC at a fixed temporal granularity, e.g. sub-seasonal or seasonal, thus only leveraging inter-granularity information and overlooking the plentiful inter-granularity correlations. SIC at various temporal granularities exhibits cumulative effects and are naturally consistent, with short-term fluctuations potentially impacting long-term trends and long-term trends provides effective hints for facilitating short-term forecasts in Arctic sea ice. Therefore, in this study, we propose to cultivate temporal multi-granularity that naturally derived from Arctic sea ice reanalysis data and provide a unified perspective for modeling SIC via our Sea Ice Foundation Model. SIFM is delicately designed to leverage both intra-granularity and inter-granularity information for capturing granularity-consistent representations that promote forecasting skills. Our extensive experiments show that SIFM outperforms off-the-shelf deep learning models for their specific temporal granularity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14732v1-abstract-full').style.display = 'none'; document.getElementById('2410.14732v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09111">arXiv:2410.09111</a> <span> [<a href="https://arxiv.org/pdf/2410.09111">pdf</a>, <a href="https://arxiv.org/format/2410.09111">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> IceDiff: High Resolution and High-Quality Sea Ice Forecasting with Generative Diffusion Prior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jingyi Xu</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+S">Siwei Tu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuhao Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Keyi Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yeqi Luo</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lipeng Ma</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Lei Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09111v1-abstract-short" style="display: inline;"> Variation of Arctic sea ice has significant impacts on polar ecosystems, transporting routes, coastal communities, and global climate. Tracing the change of sea ice at a finer scale is paramount for both operational applications and scientific studies. Recent pan-Arctic sea ice forecasting methods that leverage advances in artificial intelligence has made promising progress over numerical models.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09111v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09111v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09111v1-abstract-full" style="display: none;"> Variation of Arctic sea ice has significant impacts on polar ecosystems, transporting routes, coastal communities, and global climate. Tracing the change of sea ice at a finer scale is paramount for both operational applications and scientific studies. Recent pan-Arctic sea ice forecasting methods that leverage advances in artificial intelligence has made promising progress over numerical models. However, forecasting sea ice at higher resolutions is still under-explored. To bridge the gap, we propose a two-staged deep learning framework, IceDiff, to forecast sea ice concentration at finer scales. IceDiff first leverages an independently trained vision transformer to generate coarse yet superior forecasting over previous methods at a regular 25km x 25km grid. This high-quality sea ice forecasting can be utilized as reliable guidance for the next stage. Subsequently, an unconditional diffusion model pre-trained on sea ice concentration maps is utilized for sampling down-scaled sea ice forecasting via a zero-shot guided sampling strategy and a patch-based method. For the first time, IceDiff demonstrates sea ice forecasting with the 6.25km x 6.25km resolution. IceDiff extends the boundary of existing sea ice forecasting models and more importantly, its capability to generate high-resolution sea ice concentration data is vital for pragmatic usages and research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09111v1-abstract-full').style.display = 'none'; document.getElementById('2410.09111v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05805">arXiv:2410.05805</a> <span> [<a href="https://arxiv.org/pdf/2410.05805">pdf</a>, <a href="https://arxiv.org/format/2410.05805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PostCast: Generalizable Postprocessing for Precipitation Nowcasting via Unsupervised Blurriness Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gong%2C+J">Junchao Gong</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+S">Siwei Tu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kun Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenlong Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaokang Yang</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Lei Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05805v1-abstract-short" style="display: inline;"> Precipitation nowcasting plays a pivotal role in socioeconomic sectors, especially in severe convective weather warnings. Although notable progress has been achieved by approaches mining the spatiotemporal correlations with deep learning, these methods still suffer severe blurriness as the lead time increases, which hampers accurate predictions for extreme precipitation. To alleviate blurriness, r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05805v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05805v1-abstract-full" style="display: none;"> Precipitation nowcasting plays a pivotal role in socioeconomic sectors, especially in severe convective weather warnings. Although notable progress has been achieved by approaches mining the spatiotemporal correlations with deep learning, these methods still suffer severe blurriness as the lead time increases, which hampers accurate predictions for extreme precipitation. To alleviate blurriness, researchers explore generative methods conditioned on blurry predictions. However, the pairs of blurry predictions and corresponding ground truth need to be generated in advance, making the training pipeline cumbersome and limiting the generality of generative models within blur modes that appear in training data. By rethinking the blurriness in precipitation nowcasting as a blur kernel acting on predictions, we propose an unsupervised postprocessing method to eliminate the blurriness without the requirement of training with the pairs of blurry predictions and corresponding ground truth. Specifically, we utilize blurry predictions to guide the generation process of a pre-trained unconditional denoising diffusion probabilistic model (DDPM) to obtain high-fidelity predictions with eliminated blurriness. A zero-shot blur kernel estimation mechanism and an auto-scale denoise guidance strategy are introduced to adapt the unconditional DDPM to any blurriness modes varying from datasets and lead times in precipitation nowcasting. Extensive experiments are conducted on 7 precipitation radar datasets, demonstrating the generality and superiority of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05805v1-abstract-full').style.display = 'none'; document.getElementById('2410.05805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04963">arXiv:2409.04963</a> <span> [<a href="https://arxiv.org/pdf/2409.04963">pdf</a>, <a href="https://arxiv.org/format/2409.04963">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GS-PT: Exploiting 3D Gaussian Splatting for Comprehensive Point Cloud Understanding via Self-supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Keyi Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yeqi Luo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jingyi Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhijun Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wen-Ming Chen</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04963v1-abstract-short" style="display: inline;"> Self-supervised learning of point cloud aims to leverage unlabeled 3D data to learn meaningful representations without reliance on manual annotations. However, current approaches face challenges such as limited data diversity and inadequate augmentation for effective feature learning. To address these challenges, we propose GS-PT, which integrates 3D Gaussian Splatting (3DGS) into point cloud self… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04963v1-abstract-full').style.display = 'inline'; document.getElementById('2409.04963v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04963v1-abstract-full" style="display: none;"> Self-supervised learning of point cloud aims to leverage unlabeled 3D data to learn meaningful representations without reliance on manual annotations. However, current approaches face challenges such as limited data diversity and inadequate augmentation for effective feature learning. To address these challenges, we propose GS-PT, which integrates 3D Gaussian Splatting (3DGS) into point cloud self-supervised learning for the first time. Our pipeline utilizes transformers as the backbone for self-supervised pre-training and introduces novel contrastive learning tasks through 3DGS. Specifically, the transformers aim to reconstruct the masked point cloud. 3DGS utilizes multi-view rendered images as input to generate enhanced point cloud distributions and novel view images, facilitating data augmentation and cross-modal contrastive learning. Additionally, we incorporate features from depth maps. By optimizing these tasks collectively, our method enriches the tri-modal self-supervised learning process, enabling the model to leverage the correlation across 3D point clouds and 2D images from various modalities. We freeze the encoder after pre-training and test the model's performance on multiple downstream tasks. Experimental results indicate that GS-PT outperforms the off-the-shelf self-supervised learning methods on various downstream tasks including 3D object classification, real-world classifications, and few-shot learning and segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04963v1-abstract-full').style.display = 'none'; document.getElementById('2409.04963v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01909">arXiv:2409.01909</a> <span> [<a href="https://arxiv.org/pdf/2409.01909">pdf</a>, <a href="https://arxiv.org/format/2409.01909">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LUK: Empowering Log Understanding with Expert Knowledge from Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lipeng Ma</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+S">Sihang Jiang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mingjie Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuhao Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+B">Bo Xu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Y">Yanghua Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01909v1-abstract-short" style="display: inline;"> Logs play a critical role in providing essential information for system monitoring and troubleshooting. Recently, with the success of pre-trained language models (PLMs) and large language models (LLMs) in natural language processing (NLP), smaller PLMs (such as BERT) and LLMs (like ChatGPT) have become the current mainstream approaches for log analysis. While LLMs possess rich knowledge, their hig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01909v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01909v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01909v1-abstract-full" style="display: none;"> Logs play a critical role in providing essential information for system monitoring and troubleshooting. Recently, with the success of pre-trained language models (PLMs) and large language models (LLMs) in natural language processing (NLP), smaller PLMs (such as BERT) and LLMs (like ChatGPT) have become the current mainstream approaches for log analysis. While LLMs possess rich knowledge, their high computational costs and unstable performance make LLMs impractical for analyzing logs directly. In contrast, smaller PLMs can be fine-tuned for specific tasks even with limited computational resources, making them more practical. However, these smaller PLMs face challenges in understanding logs comprehensively due to their limited expert knowledge. To better utilize the knowledge embedded within LLMs for log understanding, this paper introduces a novel knowledge enhancement framework, called LUK, which acquires expert knowledge from LLMs to empower log understanding on a smaller PLM. Specifically, we design a multi-expert collaboration framework based on LLMs consisting of different roles to acquire expert knowledge. In addition, we propose two novel pre-training tasks to enhance the log pre-training with expert knowledge. LUK achieves state-of-the-art results on different log analysis tasks and extensive experiments demonstrate expert knowledge from LLMs can be utilized more effectively to understand logs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01909v1-abstract-full').style.display = 'none'; document.getElementById('2409.01909v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11287">arXiv:2408.11287</a> <span> [<a href="https://arxiv.org/pdf/2408.11287">pdf</a>, <a href="https://arxiv.org/format/2408.11287">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Taming Generative Diffusion Prior for Universal Blind Image Restoration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tu%2C+S">Siwei Tu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11287v2-abstract-short" style="display: inline;"> Diffusion models have been widely utilized for image restoration. However, previous blind image restoration methods still need to assume the type of degradation model while leaving the parameters to be optimized, limiting their real-world applications. Therefore, we aim to tame generative diffusion prior for universal blind image restoration dubbed BIR-D, which utilizes an optimizable convolutiona… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11287v2-abstract-full').style.display = 'inline'; document.getElementById('2408.11287v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11287v2-abstract-full" style="display: none;"> Diffusion models have been widely utilized for image restoration. However, previous blind image restoration methods still need to assume the type of degradation model while leaving the parameters to be optimized, limiting their real-world applications. Therefore, we aim to tame generative diffusion prior for universal blind image restoration dubbed BIR-D, which utilizes an optimizable convolutional kernel to simulate the degradation model and dynamically update the parameters of the kernel in the diffusion steps, enabling it to achieve blind image restoration results even in various complex situations. Besides, based on mathematical reasoning, we have provided an empirical formula for the chosen of adaptive guidance scale, eliminating the need for a grid search for the optimal parameter. Experimentally, Our BIR-D has demonstrated superior practicality and versatility than off-the-shelf unsupervised methods across various tasks both on real-world and synthetic datasets, qualitatively and quantitatively. BIR-D is able to fulfill multi-guidance blind image restoration. Moreover, BIR-D can also restore images that undergo multiple and complicated degradations, demonstrating the practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11287v2-abstract-full').style.display = 'none'; document.getElementById('2408.11287v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 12 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10236">arXiv:2406.10236</a> <span> [<a href="https://arxiv.org/pdf/2406.10236">pdf</a>, <a href="https://arxiv.org/format/2406.10236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Lightening Anything in Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yixuan Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Hengjun Gao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jingyi Xu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lipeng Ma</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yatian Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Pinghong Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10236v1-abstract-short" style="display: inline;"> The development of medical imaging techniques has made a significant contribution to clinical decision-making. However, the existence of suboptimal imaging quality, as indicated by irregular illumination or imbalanced intensity, presents significant obstacles in automating disease screening, analysis, and diagnosis. Existing approaches for natural image enhancement are mostly trained with numerous… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10236v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10236v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10236v1-abstract-full" style="display: none;"> The development of medical imaging techniques has made a significant contribution to clinical decision-making. However, the existence of suboptimal imaging quality, as indicated by irregular illumination or imbalanced intensity, presents significant obstacles in automating disease screening, analysis, and diagnosis. Existing approaches for natural image enhancement are mostly trained with numerous paired images, presenting challenges in data collection and training costs, all while lacking the ability to generalize effectively. Here, we introduce a pioneering training-free Diffusion Model for Universal Medical Image Enhancement, named UniMIE. UniMIE demonstrates its unsupervised enhancement capabilities across various medical image modalities without the need for any fine-tuning. It accomplishes this by relying solely on a single pre-trained model from ImageNet. We conduct a comprehensive evaluation on 13 imaging modalities and over 15 medical types, demonstrating better qualities, robustness, and accuracy than other modality-specific and data-inefficient models. By delivering high-quality enhancement and corresponding accuracy downstream tasks across a wide range of tasks, UniMIE exhibits considerable potential to accelerate the advancement of diagnostic tools and customized treatment plans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10236v1-abstract-full').style.display = 'none'; document.getElementById('2406.10236v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05852">arXiv:2406.05852</a> <span> [<a href="https://arxiv.org/pdf/2406.05852">pdf</a>, <a href="https://arxiv.org/format/2406.05852">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> RefGaussian: Disentangling Reflections from 3D Gaussian Splatting for Realistic Rendering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+T">Tianyue Luo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jingyi Xu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qingyuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Keyi Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Ying He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05852v1-abstract-short" style="display: inline;"> 3D Gaussian Splatting (3D-GS) has made a notable advancement in the field of neural rendering, 3D scene reconstruction, and novel view synthesis. Nevertheless, 3D-GS encounters the main challenge when it comes to accurately representing physical reflections, especially in the case of total reflection and semi-reflection that are commonly found in real-world scenes. This limitation causes reflectio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05852v1-abstract-full').style.display = 'inline'; document.getElementById('2406.05852v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05852v1-abstract-full" style="display: none;"> 3D Gaussian Splatting (3D-GS) has made a notable advancement in the field of neural rendering, 3D scene reconstruction, and novel view synthesis. Nevertheless, 3D-GS encounters the main challenge when it comes to accurately representing physical reflections, especially in the case of total reflection and semi-reflection that are commonly found in real-world scenes. This limitation causes reflections to be mistakenly treated as independent elements with physical presence, leading to imprecise reconstructions. Herein, to tackle this challenge, we propose RefGaussian to disentangle reflections from 3D-GS for realistically modeling reflections. Specifically, we propose to split a scene into transmitted and reflected components and represent these components using two Spherical Harmonics (SH). Given that this decomposition is not fully determined, we employ local regularization techniques to ensure local smoothness for both the transmitted and reflected components, thereby achieving more plausible decomposition outcomes than 3D-GS. Experimental results demonstrate that our approach achieves superior novel view synthesis and accurate depth estimation outcomes. Furthermore, it enables the utilization of scene editing applications, ensuring both high-quality results and physical coherence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05852v1-abstract-full').style.display = 'none'; document.getElementById('2406.05852v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13619">arXiv:2404.13619</a> <span> [<a href="https://arxiv.org/pdf/2404.13619">pdf</a>, <a href="https://arxiv.org/format/2404.13619">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Towards Unified Representation of Multi-Modal Pre-training for 3D Understanding via Differentiable Rendering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yixuan Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lipeng Ma</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Ying He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13619v1-abstract-short" style="display: inline;"> State-of-the-art 3D models, which excel in recognition tasks, typically depend on large-scale datasets and well-defined category sets. Recent advances in multi-modal pre-training have demonstrated potential in learning 3D representations by aligning features from 3D shapes with their 2D RGB or depth counterparts. However, these existing frameworks often rely solely on either RGB or depth images, l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13619v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13619v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13619v1-abstract-full" style="display: none;"> State-of-the-art 3D models, which excel in recognition tasks, typically depend on large-scale datasets and well-defined category sets. Recent advances in multi-modal pre-training have demonstrated potential in learning 3D representations by aligning features from 3D shapes with their 2D RGB or depth counterparts. However, these existing frameworks often rely solely on either RGB or depth images, limiting their effectiveness in harnessing a comprehensive range of multi-modal data for 3D applications. To tackle this challenge, we present DR-Point, a tri-modal pre-training framework that learns a unified representation of RGB images, depth images, and 3D point clouds by pre-training with object triplets garnered from each modality. To address the scarcity of such triplets, DR-Point employs differentiable rendering to obtain various depth images. This approach not only augments the supply of depth images but also enhances the accuracy of reconstructed point clouds, thereby promoting the representative learning of the Transformer backbone. Subsequently, using a limited number of synthetically generated triplets, DR-Point effectively learns a 3D representation space that aligns seamlessly with the RGB-Depth image space. Our extensive experiments demonstrate that DR-Point outperforms existing self-supervised learning methods in a wide range of downstream tasks, including 3D object classification, part segmentation, point cloud completion, semantic segmentation, and detection. Additionally, our ablation studies validate the effectiveness of DR-Point in enhancing point cloud understanding. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13619v1-abstract-full').style.display = 'none'; document.getElementById('2404.13619v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07106">arXiv:2404.07106</a> <span> [<a href="https://arxiv.org/pdf/2404.07106">pdf</a>, <a href="https://arxiv.org/format/2404.07106">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> 3DMambaComplete: Exploring Structured State Space Model for Point Cloud Completion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yixuan Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07106v1-abstract-short" style="display: inline;"> Point cloud completion aims to generate a complete and high-fidelity point cloud from an initially incomplete and low-quality input. A prevalent strategy involves leveraging Transformer-based models to encode global features and facilitate the reconstruction process. However, the adoption of pooling operations to obtain global feature representations often results in the loss of local details with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07106v1-abstract-full').style.display = 'inline'; document.getElementById('2404.07106v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07106v1-abstract-full" style="display: none;"> Point cloud completion aims to generate a complete and high-fidelity point cloud from an initially incomplete and low-quality input. A prevalent strategy involves leveraging Transformer-based models to encode global features and facilitate the reconstruction process. However, the adoption of pooling operations to obtain global feature representations often results in the loss of local details within the point cloud. Moreover, the attention mechanism inherent in Transformers introduces additional computational complexity, rendering it challenging to handle long sequences effectively. To address these issues, we propose 3DMambaComplete, a point cloud completion network built on the novel Mamba framework. It comprises three modules: HyperPoint Generation encodes point cloud features using Mamba's selection mechanism and predicts a set of Hyperpoints. A specific offset is estimated, and the down-sampled points become HyperPoints. The HyperPoint Spread module disperses these HyperPoints across different spatial locations to avoid concentration. Finally, a deformation method transforms the 2D mesh representation of HyperPoints into a fine-grained 3D structure for point cloud reconstruction. Extensive experiments conducted on various established benchmarks demonstrate that 3DMambaComplete surpasses state-of-the-art point cloud completion methods, as confirmed by qualitative and quantitative analyses. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07106v1-abstract-full').style.display = 'none'; document.getElementById('2404.07106v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 8 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.05522">arXiv:2404.05522</a> <span> [<a href="https://arxiv.org/pdf/2404.05522">pdf</a>, <a href="https://arxiv.org/format/2404.05522">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> 3DMambaIPF: A State Space Model for Iterative Point Cloud Filtering via Differentiable Rendering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qingyuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jingyi Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Keyi Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yeqi Luo</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Ying He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.05522v1-abstract-short" style="display: inline;"> Noise is an inevitable aspect of point cloud acquisition, necessitating filtering as a fundamental task within the realm of 3D vision. Existing learning-based filtering methods have shown promising capabilities on small-scale synthetic or real-world datasets. Nonetheless, the effectiveness of these methods is constrained when dealing with a substantial quantity of point clouds. This limitation pri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05522v1-abstract-full').style.display = 'inline'; document.getElementById('2404.05522v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.05522v1-abstract-full" style="display: none;"> Noise is an inevitable aspect of point cloud acquisition, necessitating filtering as a fundamental task within the realm of 3D vision. Existing learning-based filtering methods have shown promising capabilities on small-scale synthetic or real-world datasets. Nonetheless, the effectiveness of these methods is constrained when dealing with a substantial quantity of point clouds. This limitation primarily stems from their limited denoising capabilities for large-scale point clouds and their inclination to generate noisy outliers after denoising. The recent introduction of State Space Models (SSMs) for long sequence modeling in Natural Language Processing (NLP) presents a promising solution for handling large-scale data. Encouraged by iterative point cloud filtering methods, we introduce 3DMambaIPF, firstly incorporating Mamba (Selective SSM) architecture to sequentially handle extensive point clouds from large scenes, capitalizing on its strengths in selective input processing and long sequence modeling capabilities. Additionally, we integrate a robust and fast differentiable rendering loss to constrain the noisy points around the surface. In contrast to previous methodologies, this differentiable rendering loss enhances the visual realism of denoised geometric structures and aligns point cloud boundaries more closely with those observed in real-world objects. Extensive evaluation on datasets comprising small-scale synthetic and real-world models (typically with up to 50K points) demonstrate that our method achieves state-of-the-art results. Moreover, we showcase the superior scalability and efficiency of our method on large-scale models with about 500K points, where the majority of the existing learning-based denoising methods are unable to handle. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05522v1-abstract-full').style.display = 'none'; document.getElementById('2404.05522v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11990">arXiv:2403.11990</a> <span> [<a href="https://arxiv.org/pdf/2403.11990">pdf</a>, <a href="https://arxiv.org/format/2403.11990">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GetMesh: A Controllable Model for High-quality Mesh Generation and Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+Z">Zhaoyang Lyu</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinyi Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xudong Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ya Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+B">Bo Dai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11990v1-abstract-short" style="display: inline;"> Mesh is a fundamental representation of 3D assets in various industrial applications, and is widely supported by professional softwares. However, due to its irregular structure, mesh creation and manipulation is often time-consuming and labor-intensive. In this paper, we propose a highly controllable generative model, GetMesh, for mesh generation and manipulation across different categories. By ta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11990v1-abstract-full').style.display = 'inline'; document.getElementById('2403.11990v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11990v1-abstract-full" style="display: none;"> Mesh is a fundamental representation of 3D assets in various industrial applications, and is widely supported by professional softwares. However, due to its irregular structure, mesh creation and manipulation is often time-consuming and labor-intensive. In this paper, we propose a highly controllable generative model, GetMesh, for mesh generation and manipulation across different categories. By taking a varying number of points as the latent representation, and re-organizing them as triplane representation, GetMesh generates meshes with rich and sharp details, outperforming both single-category and multi-category counterparts. Moreover, it also enables fine-grained control over the generation process that previous mesh generative models cannot achieve, where changing global/local mesh topologies, adding/removing mesh parts, and combining mesh parts across categories can be intuitively, efficiently, and robustly accomplished by adjusting the number, positions or features of latent points. Project page is https://getmesh.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11990v1-abstract-full').style.display = 'none'; document.getElementById('2403.11990v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.10001">arXiv:2403.10001</a> <span> [<a href="https://arxiv.org/pdf/2403.10001">pdf</a>, <a href="https://arxiv.org/format/2403.10001">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Visual Foundation Models Boost Cross-Modal Unsupervised Domain Adaptation for 3D Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jingyi Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+L">Lingdong Kong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Youquan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qingyuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.10001v1-abstract-short" style="display: inline;"> Unsupervised domain adaptation (UDA) is vital for alleviating the workload of labeling 3D point cloud data and mitigating the absence of labels when facing a newly defined domain. Various methods of utilizing images to enhance the performance of cross-domain 3D segmentation have recently emerged. However, the pseudo labels, which are generated from models trained on the source domain and provide a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.10001v1-abstract-full').style.display = 'inline'; document.getElementById('2403.10001v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.10001v1-abstract-full" style="display: none;"> Unsupervised domain adaptation (UDA) is vital for alleviating the workload of labeling 3D point cloud data and mitigating the absence of labels when facing a newly defined domain. Various methods of utilizing images to enhance the performance of cross-domain 3D segmentation have recently emerged. However, the pseudo labels, which are generated from models trained on the source domain and provide additional supervised signals for the unseen domain, are inadequate when utilized for 3D segmentation due to their inherent noisiness and consequently restrict the accuracy of neural networks. With the advent of 2D visual foundation models (VFMs) and their abundant knowledge prior, we propose a novel pipeline VFMSeg to further enhance the cross-modal unsupervised domain adaptation framework by leveraging these models. In this work, we study how to harness the knowledge priors learned by VFMs to produce more accurate labels for unlabeled target domains and improve overall performance. We first utilize a multi-modal VFM, which is pre-trained on large scale image-text pairs, to provide supervised labels (VFM-PL) for images and point clouds from the target domain. Then, another VFM trained on fine-grained 2D masks is adopted to guide the generation of semantically augmented images and point clouds to enhance the performance of neural networks, which mix the data from source and target domains like view frustums (FrustumMixing). Finally, we merge class-wise prediction across modalities to produce more accurate annotations for unlabeled target domains. Our method is evaluated on various autonomous driving datasets and the results demonstrate a significant improvement for 3D segmentation task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.10001v1-abstract-full').style.display = 'none'; document.getElementById('2403.10001v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.07181">arXiv:2402.07181</a> <span> [<a href="https://arxiv.org/pdf/2402.07181">pdf</a>, <a href="https://arxiv.org/format/2402.07181">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TVCG.2024.3397828">10.1109/TVCG.2024.3397828 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> 3D Gaussian as a New Era: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jingyi Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qingyuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Ying He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.07181v2-abstract-short" style="display: inline;"> 3D Gaussian Splatting (3D-GS) has emerged as a significant advancement in the field of Computer Graphics, offering explicit scene representation and novel view synthesis without the reliance on neural networks, such as Neural Radiance Fields (NeRF). This technique has found diverse applications in areas such as robotics, urban mapping, autonomous navigation, and virtual reality/augmented reality,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.07181v2-abstract-full').style.display = 'inline'; document.getElementById('2402.07181v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.07181v2-abstract-full" style="display: none;"> 3D Gaussian Splatting (3D-GS) has emerged as a significant advancement in the field of Computer Graphics, offering explicit scene representation and novel view synthesis without the reliance on neural networks, such as Neural Radiance Fields (NeRF). This technique has found diverse applications in areas such as robotics, urban mapping, autonomous navigation, and virtual reality/augmented reality, just name a few. Given the growing popularity and expanding research in 3D Gaussian Splatting, this paper presents a comprehensive survey of relevant papers from the past year. We organize the survey into taxonomies based on characteristics and applications, providing an introduction to the theoretical underpinnings of 3D Gaussian Splatting. Our goal through this survey is to acquaint new researchers with 3D Gaussian Splatting, serve as a valuable reference for seminal works in the field, and inspire future research directions, as discussed in our concluding section. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.07181v2-abstract-full').style.display = 'none'; document.getElementById('2402.07181v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IEEE TVCG 2024, Please refer to: https://ieeexplore.ieee.org/document/10521791</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.17603">arXiv:2401.17603</a> <span> [<a href="https://arxiv.org/pdf/2401.17603">pdf</a>, <a href="https://arxiv.org/format/2401.17603">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Topology-Aware Latent Diffusion for 3D Shape Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jiangbei Hu</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+B">Baixin Xu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+F">Fei Hou</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shengfa Wang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+N">Na Lei</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+C">Chen Qian</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Ying He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.17603v1-abstract-short" style="display: inline;"> We introduce a new generative model that combines latent diffusion with persistent homology to create 3D shapes with high diversity, with a special emphasis on their topological characteristics. Our method involves representing 3D shapes as implicit fields, then employing persistent homology to extract topological features, including Betti numbers and persistence diagrams. The shape generation pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.17603v1-abstract-full').style.display = 'inline'; document.getElementById('2401.17603v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.17603v1-abstract-full" style="display: none;"> We introduce a new generative model that combines latent diffusion with persistent homology to create 3D shapes with high diversity, with a special emphasis on their topological characteristics. Our method involves representing 3D shapes as implicit fields, then employing persistent homology to extract topological features, including Betti numbers and persistence diagrams. The shape generation process consists of two steps. Initially, we employ a transformer-based autoencoding module to embed the implicit representation of each 3D shape into a set of latent vectors. Subsequently, we navigate through the learned latent space via a diffusion model. By strategically incorporating topological features into the diffusion process, our generative module is able to produce a richer variety of 3D shapes with different topological structures. Furthermore, our framework is flexible, supporting generation tasks constrained by a variety of inputs, including sparse and partial point clouds, as well as sketches. By modifying the persistence diagrams, we can alter the topology of the shapes generated from these input modalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.17603v1-abstract-full').style.display = 'none'; document.getElementById('2401.17603v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.3.5; I.2.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.04691">arXiv:2305.04691</a> <span> [<a href="https://arxiv.org/pdf/2305.04691">pdf</a>, <a href="https://arxiv.org/format/2305.04691">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Self-supervised Learning for Pre-Training 3D Point Clouds: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Liwen Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+T">Tianyue Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yixuan Li</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Ying He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.04691v1-abstract-short" style="display: inline;"> Point cloud data has been extensively studied due to its compact form and flexibility in representing complex 3D structures. The ability of point cloud data to accurately capture and represent intricate 3D geometry makes it an ideal choice for a wide range of applications, including computer vision, robotics, and autonomous driving, all of which require an understanding of the underlying spatial s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.04691v1-abstract-full').style.display = 'inline'; document.getElementById('2305.04691v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.04691v1-abstract-full" style="display: none;"> Point cloud data has been extensively studied due to its compact form and flexibility in representing complex 3D structures. The ability of point cloud data to accurately capture and represent intricate 3D geometry makes it an ideal choice for a wide range of applications, including computer vision, robotics, and autonomous driving, all of which require an understanding of the underlying spatial structures. Given the challenges associated with annotating large-scale point clouds, self-supervised point cloud representation learning has attracted increasing attention in recent years. This approach aims to learn generic and useful point cloud representations from unlabeled data, circumventing the need for extensive manual annotations. In this paper, we present a comprehensive survey of self-supervised point cloud representation learning using DNNs. We begin by presenting the motivation and general trends in recent research. We then briefly introduce the commonly used datasets and evaluation metrics. Following that, we delve into an extensive exploration of self-supervised point cloud representation learning methods based on these techniques. Finally, we share our thoughts on some of the challenges and potential issues that future research in self-supervised learning for pre-training 3D point clouds may encounter. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.04691v1-abstract-full').style.display = 'none'; document.getElementById('2305.04691v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 12 figures, 14 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.01247">arXiv:2304.01247</a> <span> [<a href="https://arxiv.org/pdf/2304.01247">pdf</a>, <a href="https://arxiv.org/format/2304.01247">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Generative Diffusion Prior for Unified Image Restoration and Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+Z">Zhaoyang Lyu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+L">Liang Pan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junzhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+T">Tianyue Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bo Zhang</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+B">Bo Dai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.01247v1-abstract-short" style="display: inline;"> Existing image restoration methods mostly leverage the posterior distribution of natural images. However, they often assume known degradation and also require supervised training, which restricts their adaptation to complex real applications. In this work, we propose the Generative Diffusion Prior (GDP) to effectively model the posterior distributions in an unsupervised sampling manner. GDP utiliz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.01247v1-abstract-full').style.display = 'inline'; document.getElementById('2304.01247v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.01247v1-abstract-full" style="display: none;"> Existing image restoration methods mostly leverage the posterior distribution of natural images. However, they often assume known degradation and also require supervised training, which restricts their adaptation to complex real applications. In this work, we propose the Generative Diffusion Prior (GDP) to effectively model the posterior distributions in an unsupervised sampling manner. GDP utilizes a pre-train denoising diffusion generative model (DDPM) for solving linear inverse, non-linear, or blind problems. Specifically, GDP systematically explores a protocol of conditional guidance, which is verified more practical than the commonly used guidance way. Furthermore, GDP is strength at optimizing the parameters of degradation model during the denoising process, achieving blind image restoration. Besides, we devise hierarchical guidance and patch-based methods, enabling the GDP to generate images of arbitrary resolutions. Experimentally, we demonstrate GDP's versatility on several image datasets for linear problems, such as super-resolution, deblurring, inpainting, and colorization, as well as non-linear and blind issues, such as low-light enhancement and HDR image recovery. GDP outperforms the current leading unsupervised methods on the diverse benchmarks in reconstruction quality and perceptual quality. Moreover, GDP also generalizes well for natural images or synthesized images with arbitrary sizes from various tasks out of the distribution of the ImageNet training set. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.01247v1-abstract-full').style.display = 'none'; document.getElementById('2304.01247v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">46 pages, 38 figures, accepted by CVPR2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.10390">arXiv:2212.10390</a> <span> [<a href="https://arxiv.org/pdf/2212.10390">pdf</a>, <a href="https://arxiv.org/format/2212.10390">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> UniDA3D: Unified Domain Adaptive 3D Semantic Segmentation Pipeline </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Siyuan Huang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jiakang Yuan</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+B">Botian Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bo Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+M">Min Dou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yikang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.10390v4-abstract-short" style="display: inline;"> State-of-the-art 3D semantic segmentation models are trained on off-the-shelf public benchmarks, but they will inevitably face the challenge of recognition accuracy drop when these well-trained models are deployed to a new domain. In this paper, we introduce a Unified Domain Adaptive 3D semantic segmentation pipeline (UniDA3D) to enhance the weak generalization ability, and bridge the point distri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10390v4-abstract-full').style.display = 'inline'; document.getElementById('2212.10390v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.10390v4-abstract-full" style="display: none;"> State-of-the-art 3D semantic segmentation models are trained on off-the-shelf public benchmarks, but they will inevitably face the challenge of recognition accuracy drop when these well-trained models are deployed to a new domain. In this paper, we introduce a Unified Domain Adaptive 3D semantic segmentation pipeline (UniDA3D) to enhance the weak generalization ability, and bridge the point distribution gap between domains. Different from previous studies that only focus on a single adaptation task, UniDA3D can tackle several adaptation tasks in 3D segmentation field, by designing a unified source-and-target active sampling strategy, which selects a maximally-informative subset from both source and target domains for effective model adaptation. Besides, benefiting from the rise of multi-modal 2D-3D datasets, UniDA3D investigates the possibility of achieving a multi-modal sampling strategy, by developing a cross-modality feature interaction module that can extract a representative pair of image and point features to achieve a bi-directional image-point feature interaction for safe model adaptation. Experimentally, UniDA3D is verified to be effective in many adaptation tasks including: 1) unsupervised domain adaptation, 2) unsupervised few-shot domain adaptation; 3) active domain adaptation. Their results demonstrate that, by easily coupling UniDA3D with off-the-shelf 3D segmentation baselines, domain generalization ability of these baselines can be enhanced. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10390v4-abstract-full').style.display = 'none'; document.getElementById('2212.10390v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.03311">arXiv:2203.03311</a> <span> [<a href="https://arxiv.org/pdf/2203.03311">pdf</a>, <a href="https://arxiv.org/format/2203.03311">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TITS.2022.3195555">10.1109/TITS.2022.3195555 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Comprehensive Review of Deep Learning-Based 3D Point Cloud Completion Processing and Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenming Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhijun Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yikang Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+T">Tao Ma</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xing Hu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lipeng Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.03311v3-abstract-short" style="display: inline;"> Point cloud completion is a generation and estimation issue derived from the partial point clouds, which plays a vital role in the applications in 3D computer vision. The progress of deep learning (DL) has impressively improved the capability and robustness of point cloud completion. However, the quality of completed point clouds is still needed to be further enhanced to meet the practical utiliza… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.03311v3-abstract-full').style.display = 'inline'; document.getElementById('2203.03311v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.03311v3-abstract-full" style="display: none;"> Point cloud completion is a generation and estimation issue derived from the partial point clouds, which plays a vital role in the applications in 3D computer vision. The progress of deep learning (DL) has impressively improved the capability and robustness of point cloud completion. However, the quality of completed point clouds is still needed to be further enhanced to meet the practical utilization. Therefore, this work aims to conduct a comprehensive survey on various methods, including point-based, convolution-based, graph-based, and generative model-based approaches, etc. And this survey summarizes the comparisons among these methods to provoke further research insights. Besides, this review sums up the commonly used datasets and illustrates the applications of point cloud completion. Eventually, we also discussed possible research trends in this promptly expanding field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.03311v3-abstract-full').style.display = 'none'; document.getElementById('2203.03311v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Intelligent Transportation Systems 23 (2022) 22862-22883 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.10074">arXiv:2112.10074</a> <span> [<a href="https://arxiv.org/pdf/2112.10074">pdf</a>, <a href="https://arxiv.org/format/2112.10074">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.59275/j.melba.2022-354b">10.59275/j.melba.2022-354b <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> QU-BraTS: MICCAI BraTS 2020 Challenge on Quantifying Uncertainty in Brain Tumor Segmentation - Analysis of Ranking Scores and Benchmarking Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mehta%2C+R">Raghav Mehta</a>, <a href="/search/cs?searchtype=author&query=Filos%2C+A">Angelos Filos</a>, <a href="/search/cs?searchtype=author&query=Baid%2C+U">Ujjwal Baid</a>, <a href="/search/cs?searchtype=author&query=Sako%2C+C">Chiharu Sako</a>, <a href="/search/cs?searchtype=author&query=McKinley%2C+R">Richard McKinley</a>, <a href="/search/cs?searchtype=author&query=Rebsamen%2C+M">Michael Rebsamen</a>, <a href="/search/cs?searchtype=author&query=Datwyler%2C+K">Katrin Datwyler</a>, <a href="/search/cs?searchtype=author&query=Meier%2C+R">Raphael Meier</a>, <a href="/search/cs?searchtype=author&query=Radojewski%2C+P">Piotr Radojewski</a>, <a href="/search/cs?searchtype=author&query=Murugesan%2C+G+K">Gowtham Krishnan Murugesan</a>, <a href="/search/cs?searchtype=author&query=Nalawade%2C+S">Sahil Nalawade</a>, <a href="/search/cs?searchtype=author&query=Ganesh%2C+C">Chandan Ganesh</a>, <a href="/search/cs?searchtype=author&query=Wagner%2C+B">Ben Wagner</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+F+F">Fang F. Yu</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Baowei Fei</a>, <a href="/search/cs?searchtype=author&query=Madhuranthakam%2C+A+J">Ananth J. Madhuranthakam</a>, <a href="/search/cs?searchtype=author&query=Maldjian%2C+J+A">Joseph A. Maldjian</a>, <a href="/search/cs?searchtype=author&query=Daza%2C+L">Laura Daza</a>, <a href="/search/cs?searchtype=author&query=Gomez%2C+C">Catalina Gomez</a>, <a href="/search/cs?searchtype=author&query=Arbelaez%2C+P">Pablo Arbelaez</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+C">Chengliang Dai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+Y">Yuan-han Mo</a>, <a href="/search/cs?searchtype=author&query=Angelini%2C+E">Elsa Angelini</a> , et al. (67 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.10074v2-abstract-short" style="display: inline;"> Deep learning (DL) models have provided state-of-the-art performance in various medical imaging benchmarking challenges, including the Brain Tumor Segmentation (BraTS) challenges. However, the task of focal pathology multi-compartment segmentation (e.g., tumor and lesion sub-regions) is particularly challenging, and potential errors hinder translating DL models into clinical workflows. Quantifying… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.10074v2-abstract-full').style.display = 'inline'; document.getElementById('2112.10074v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.10074v2-abstract-full" style="display: none;"> Deep learning (DL) models have provided state-of-the-art performance in various medical imaging benchmarking challenges, including the Brain Tumor Segmentation (BraTS) challenges. However, the task of focal pathology multi-compartment segmentation (e.g., tumor and lesion sub-regions) is particularly challenging, and potential errors hinder translating DL models into clinical workflows. Quantifying the reliability of DL model predictions in the form of uncertainties could enable clinical review of the most uncertain regions, thereby building trust and paving the way toward clinical translation. Several uncertainty estimation methods have recently been introduced for DL medical image segmentation tasks. Developing scores to evaluate and compare the performance of uncertainty measures will assist the end-user in making more informed decisions. In this study, we explore and evaluate a score developed during the BraTS 2019 and BraTS 2020 task on uncertainty quantification (QU-BraTS) and designed to assess and rank uncertainty estimates for brain tumor multi-compartment segmentation. This score (1) rewards uncertainty estimates that produce high confidence in correct assertions and those that assign low confidence levels at incorrect assertions, and (2) penalizes uncertainty measures that lead to a higher percentage of under-confident correct assertions. We further benchmark the segmentation uncertainties generated by 14 independent participating teams of QU-BraTS 2020, all of which also participated in the main BraTS segmentation task. Overall, our findings confirm the importance and complementary value that uncertainty estimates provide to segmentation algorithms, highlighting the need for uncertainty quantification in medical image analyses. Finally, in favor of transparency and reproducibility, our evaluation code is made publicly available at: https://github.com/RagMeh11/QU-BraTS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.10074v2-abstract-full').style.display = 'none'; document.getElementById('2112.10074v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at the Journal of Machine Learning for Biomedical Imaging (MELBA): https://www.melba-journal.org/papers/2022:026.html</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Machine.Learning.for.Biomedical.Imaging. 1 (2022) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.09693">arXiv:2110.09693</a> <span> [<a href="https://arxiv.org/pdf/2110.09693">pdf</a>, <a href="https://arxiv.org/format/2110.09693">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cross-Vendor CT Image Data Harmonization Using CVH-CT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Selim%2C+M">Md Selim</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Baowei Fei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guo-Qiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+G+Y">Gary Yeeming Ge</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.09693v1-abstract-short" style="display: inline;"> While remarkable advances have been made in Computed Tomography (CT), most of the existing efforts focus on imaging enhancement while reducing radiation dose. How to harmonize CT image data captured using different scanners is vital in cross-center large-scale radiomics studies but remains the boundary to explore. Furthermore, the lack of paired training image problem makes it computationally chal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.09693v1-abstract-full').style.display = 'inline'; document.getElementById('2110.09693v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.09693v1-abstract-full" style="display: none;"> While remarkable advances have been made in Computed Tomography (CT), most of the existing efforts focus on imaging enhancement while reducing radiation dose. How to harmonize CT image data captured using different scanners is vital in cross-center large-scale radiomics studies but remains the boundary to explore. Furthermore, the lack of paired training image problem makes it computationally challenging to adopt existing deep learning models. %developed for CT image standardization. %this problem more challenging. We propose a novel deep learning approach called CVH-CT for harmonizing CT images captured using scanners from different vendors. The generator of CVH-CT uses a self-attention mechanism to learn the scanner-related information. We also propose a VGG feature-based domain loss to effectively extract texture properties from unpaired image data to learn the scanner-based texture distributions. The experimental results show that CVH-CT is clearly better than the baselines because of the use of the proposed domain loss, and CVH-CT can effectively reduce the scanner-related variability in terms of radiomic features. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.09693v1-abstract-full').style.display = 'none'; document.getElementById('2110.09693v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.01337">arXiv:2107.01337</a> <span> [<a href="https://arxiv.org/pdf/2107.01337">pdf</a>, <a href="https://arxiv.org/format/2107.01337">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CT Image Harmonization for Enhancing Radiomics Studies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Selim%2C+M">Md Selim</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Baowei Fei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guo-Qiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.01337v1-abstract-short" style="display: inline;"> While remarkable advances have been made in Computed Tomography (CT), capturing CT images with non-standardized protocols causes low reproducibility regarding radiomic features, forming a barrier on CT image analysis in a large scale. RadiomicGAN is developed to effectively mitigate the discrepancy caused by using non-standard reconstruction kernels. RadiomicGAN consists of hybrid neural blocks in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.01337v1-abstract-full').style.display = 'inline'; document.getElementById('2107.01337v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.01337v1-abstract-full" style="display: none;"> While remarkable advances have been made in Computed Tomography (CT), capturing CT images with non-standardized protocols causes low reproducibility regarding radiomic features, forming a barrier on CT image analysis in a large scale. RadiomicGAN is developed to effectively mitigate the discrepancy caused by using non-standard reconstruction kernels. RadiomicGAN consists of hybrid neural blocks including both pre-trained and trainable layers adopted to learn radiomic feature distributions efficiently. A novel training approach, called Dynamic Window-based Training, has been developed to smoothly transform the pre-trained model to the medical imaging domain. Model performance evaluated using 1401 radiomic features show that RadiomicGAN clearly outperforms the state-of-art image standardization models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.01337v1-abstract-full').style.display = 'none'; document.getElementById('2107.01337v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.05229">arXiv:2102.05229</a> <span> [<a href="https://arxiv.org/pdf/2102.05229">pdf</a>, <a href="https://arxiv.org/ps/2102.05229">ps</a>, <a href="https://arxiv.org/format/2102.05229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.neunet.2020.05.005">10.1016/j.neunet.2020.05.005 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Sequential vessel segmentation via deep channel attention network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hao%2C+D">Dongdong Hao</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+S">Song Ding</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+L">Linwei Qiu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+Y">Yisong Lv</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Baowei Fei</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yueqi Zhu</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+B">Binjie Qin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.05229v1-abstract-short" style="display: inline;"> This paper develops a novel encoder-decoder deep network architecture which exploits the several contextual frames of 2D+t sequential images in a sliding window centered at current frame to segment 2D vessel masks from the current frame. The architecture is equipped with temporal-spatial feature extraction in encoder stage, feature fusion in skip connection layers and channel attention mechanism i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.05229v1-abstract-full').style.display = 'inline'; document.getElementById('2102.05229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.05229v1-abstract-full" style="display: none;"> This paper develops a novel encoder-decoder deep network architecture which exploits the several contextual frames of 2D+t sequential images in a sliding window centered at current frame to segment 2D vessel masks from the current frame. The architecture is equipped with temporal-spatial feature extraction in encoder stage, feature fusion in skip connection layers and channel attention mechanism in decoder stage. In the encoder stage, a series of 3D convolutional layers are employed to hierarchically extract temporal-spatial features. Skip connection layers subsequently fuse the temporal-spatial feature maps and deliver them to the corresponding decoder stages. To efficiently discriminate vessel features from the complex and noisy backgrounds in the XCA images, the decoder stage effectively utilizes channel attention blocks to refine the intermediate feature maps from skip connection layers for subsequently decoding the refined features in 2D ways to produce the segmented vessel masks. Furthermore, Dice loss function is implemented to train the proposed deep network in order to tackle the class imbalance problem in the XCA data due to the wide distribution of complex background artifacts. Extensive experiments by comparing our method with other state-of-the-art algorithms demonstrate the proposed method's superior performance over other methods in terms of the quantitative metrics and visual validation. The source codes are at https://github.com/Binjie-Qin/SVS-net <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.05229v1-abstract-full').style.display = 'none'; document.getElementById('2102.05229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neural Networks, 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.01307">arXiv:2004.01307</a> <span> [<a href="https://arxiv.org/pdf/2004.01307">pdf</a>, <a href="https://arxiv.org/format/2004.01307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> STAN-CT: Standardizing CT Image using Generative Adversarial Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Selim%2C+M">Md Selim</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Baowei Fei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guo-Qiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.01307v1-abstract-short" style="display: inline;"> Computed tomography (CT) plays an important role in lung malignancy diagnostics and therapy assessment and facilitating precision medicine delivery. However, the use of personalized imaging protocols poses a challenge in large-scale cross-center CT image radiomic studies. We present an end-to-end solution called STAN-CT for CT image standardization and normalization, which effectively reduces disc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.01307v1-abstract-full').style.display = 'inline'; document.getElementById('2004.01307v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.01307v1-abstract-full" style="display: none;"> Computed tomography (CT) plays an important role in lung malignancy diagnostics and therapy assessment and facilitating precision medicine delivery. However, the use of personalized imaging protocols poses a challenge in large-scale cross-center CT image radiomic studies. We present an end-to-end solution called STAN-CT for CT image standardization and normalization, which effectively reduces discrepancies in image features caused by using different imaging protocols or using different CT scanners with the same imaging protocol. STAN-CT consists of two components: 1) a novel Generative Adversarial Networks (GAN) model that is capable of effectively learning the data distribution of a standard imaging protocol with only a few rounds of generator training, and 2) an automatic DICOM reconstruction pipeline with systematic image quality control that ensure the generation of high-quality standard DICOM images. Experimental results indicate that the training efficiency and model performance of STAN-CT have been significantly improved compared to the state-of-the-art CT image standardization and normalization algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.01307v1-abstract-full').style.display = 'none'; document.getElementById('2004.01307v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 5 figures</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository