CINXE.COM

Graphics

<!DOCTYPE html> <html lang="en"> <head> <title>Graphics </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a> <!-- start desktop header --> <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>&gt;</span> <a href="/list/cs.GR/recent">cs.GR</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div><!-- /end desktop header --> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div><!-- /end mobile-header --> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Graphics</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item7">Cross-lists</a></li> <li><a href="#item9">Replacements</a></li> </ul> <p>See <a id="recent-cs.GR" aria-labelledby="recent-cs.GR" href="/list/cs.GR/recent">recent</a> articles</p> <h3>Showing new listings for Thursday, 20 March 2025</h3> <div class='paging'>Total of 10 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.GR/new?skip=0&amp;show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 6 of 6 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2503.14756" title="Abstract" id="2503.14756"> arXiv:2503.14756 </a> [<a href="/pdf/2503.14756" title="Download PDF" id="pdf-2503.14756" aria-labelledby="pdf-2503.14756">pdf</a>, <a href="/format/2503.14756" title="Other formats" id="oth-2503.14756" aria-labelledby="oth-2503.14756">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SceneEval: Evaluating Semantic Coherence in Text-Conditioned 3D Indoor Scene Synthesis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Tam,+H+I+I">Hou In Ivan Tam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Pun,+H+I+D">Hou In Derek Pun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+A+T">Austin T. Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chang,+A+X">Angel X. Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Savva,+M">Manolis Savva</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 6 figures, 6 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Despite recent advances in text-conditioned 3D indoor scene generation, there remain gaps in the evaluation of these methods. Existing metrics primarily assess the realism of generated scenes by comparing them to a set of ground-truth scenes, often overlooking alignment with the input text - a critical factor in determining how effectively a method meets user requirements. We present SceneEval, an evaluation framework designed to address this limitation. SceneEval includes metrics for both explicit user requirements, such as the presence of specific objects and their attributes described in the input text, and implicit expectations, like the absence of object collisions, providing a comprehensive assessment of scene quality. To facilitate evaluation, we introduce SceneEval-100, a dataset of scene descriptions with annotated ground-truth scene properties. We evaluate recent scene generation methods using SceneEval and demonstrate its ability to provide detailed assessments of the generated scenes, highlighting strengths and areas for improvement across multiple dimensions. Our results show that current methods struggle at generating scenes that meet user requirements, underscoring the need for further research in this direction. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2503.14845" title="Abstract" id="2503.14845"> arXiv:2503.14845 </a> [<a href="/pdf/2503.14845" title="Download PDF" id="pdf-2503.14845" aria-labelledby="pdf-2503.14845">pdf</a>, <a href="https://arxiv.org/html/2503.14845v1" title="View HTML" id="html-2503.14845" aria-labelledby="html-2503.14845" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14845" title="Other formats" id="oth-2503.14845" aria-labelledby="oth-2503.14845">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ClimateGS: Real-Time Climate Simulation with 3D Gaussian Style Transfer </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xie,+Y">Yuezhen Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+M">Meiying Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Hao,+Q">Qi Hao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Adverse climate conditions pose significant challenges for autonomous systems, demanding reliable perception and decision-making across diverse environments. To better simulate these conditions, physically-based NeRF rendering methods have been explored for their ability to generate realistic scene representations. However, these methods suffer from slow rendering speeds and long preprocessing times, making them impractical for real-time testing and user interaction. This paper presents ClimateGS, a novel framework integrating 3D Gaussian representations with physical simulation to enable real-time climate effects rendering. The novelty of this work is threefold: 1) developing a linear transformation for 3D Gaussian photorealistic style transfer, enabling direct modification of spherical harmonics across bands for efficient and consistent style adaptation; 2) developing a joint training strategy for 3D style transfer, combining supervised and self-supervised learning to accelerate convergence while preserving original scene details; 3) developing a real-time rendering method for climate simulation, integrating physics-based effects with 3D Gaussian to achieve efficient and realistic rendering. We evaluate ClimateGS on MipNeRF360 and Tanks and Temples, demonstrating real-time rendering with comparable or superior visual quality to SOTA 2D/3D methods, making it suitable for interactive applications. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2503.14908" title="Abstract" id="2503.14908"> arXiv:2503.14908 </a> [<a href="/pdf/2503.14908" title="Download PDF" id="pdf-2503.14908" aria-labelledby="pdf-2503.14908">pdf</a>, <a href="https://arxiv.org/html/2503.14908v1" title="View HTML" id="html-2503.14908" aria-labelledby="html-2503.14908" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14908" title="Other formats" id="oth-2503.14908" aria-labelledby="oth-2503.14908">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> POSTA: A Go-to Framework for Customized Artistic Poster Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chen,+H">Haoyu Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xu,+X">Xiaojie Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Li,+W">Wenbo Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ren,+J">Jingjing Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ye,+T">Tian Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Liu,+S">Songhua Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chen,+Y">Ying-Cong Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhu,+L">Lei Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+X">Xinchao Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to CVPR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Poster design is a critical medium for visual communication. Prior work has explored automatic poster design using deep learning techniques, but these approaches lack text accuracy, user customization, and aesthetic appeal, limiting their applicability in artistic domains such as movies and exhibitions, where both clear content delivery and visual impact are essential. To address these limitations, we present POSTA: a modular framework powered by diffusion models and multimodal large language models (MLLMs) for customized artistic poster generation. The framework consists of three modules. Background Diffusion creates a themed background based on user input. Design MLLM then generates layout and typography elements that align with and complement the background style. Finally, to enhance the poster&#39;s aesthetic appeal, ArtText Diffusion applies additional stylization to key text elements. The final result is a visually cohesive and appealing poster, with a fully modular process that allows for complete customization. To train our models, we develop the PosterArt dataset, comprising high-quality artistic posters annotated with layout, typography, and pixel-level stylized text segmentation. Our comprehensive experimental analysis demonstrates POSTA&#39;s exceptional controllability and design diversity, outperforming existing models in both text accuracy and aesthetic quality. </p> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2503.15078" title="Abstract" id="2503.15078"> arXiv:2503.15078 </a> [<a href="/pdf/2503.15078" title="Download PDF" id="pdf-2503.15078" aria-labelledby="pdf-2503.15078">pdf</a>, <a href="https://arxiv.org/html/2503.15078v1" title="View HTML" id="html-2503.15078" aria-labelledby="html-2503.15078" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15078" title="Other formats" id="oth-2503.15078" aria-labelledby="oth-2503.15078">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fast But Accurate: A Real-Time Hyperelastic Simulator with Robust Frictional Contact </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zeng,+Z">Ziqiu Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Luo,+S">Siyuan Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Shi,+F">Fan Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+Z">Zhongkai Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span> </div> <p class='mathjax'> We present a GPU-friendly framework for real-time implicit simulation of elastic material in the presence of frictional contacts. The integration of hyperelasticity, non-interpenetration contact, and friction in real-time simulations presents formidable nonlinear and non-smooth problems, which are highly challenging to solve. By incorporating nonlinear complementarity conditions within the local-global framework, we achieve rapid convergence in addressing these challenges. While the structure of local-global methods is not fully GPU-friendly, our proposal of a simple yet efficient solver with sparse presentation of the system inverse enables highly parallel computing while maintaining a fast convergence rate. Moreover, our novel splitting strategy for non-smooth indicators not only amplifies overall performance but also refines the complementarity preconditioner, enhancing the accuracy of frictional behavior modeling. Through extensive experimentation, the robustness of our framework in managing real-time contact scenarios, ranging from large-scale systems and extreme deformations to non-smooth contacts and precise friction interactions, has been validated. Compatible with a wide range of hyperelastic models, our approach maintains efficiency across both low and high stiffness materials. Despite its remarkable efficiency, robustness, and generality, our method is elegantly simple, with its core contributions grounded solely on standard matrix operations. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2503.15147" title="Abstract" id="2503.15147"> arXiv:2503.15147 </a> [<a href="/pdf/2503.15147" title="Download PDF" id="pdf-2503.15147" aria-labelledby="pdf-2503.15147">pdf</a>, <a href="https://arxiv.org/html/2503.15147v1" title="View HTML" id="html-2503.15147" aria-labelledby="html-2503.15147" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15147" title="Other formats" id="oth-2503.15147" aria-labelledby="oth-2503.15147">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Diffusion-based G-buffer generation and rendering </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xue,+B">Bowen Xue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Guarnera,+G+C">Giuseppe Claudio Guarnera</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhao,+S">Shuang Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Montazeri,+Z">Zahra Montazeri</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span> </div> <p class='mathjax'> Despite recent advances in text-to-image generation, controlling geometric layout and material properties in synthesized scenes remains challenging. We present a novel pipeline that first produces a G-buffer (albedo, normals, depth, roughness, and metallic) from a text prompt and then renders a final image through a modular neural network. This intermediate representation enables fine-grained editing: users can copy and paste within specific G-buffer channels to insert or reposition objects, or apply masks to the irradiance channel to adjust lighting locally. As a result, real objects can be seamlessly integrated into virtual scenes, and virtual objects can be placed into real environments with high fidelity. By separating scene decomposition from image rendering, our method offers a practical balance between detailed post-generation control and efficient text-driven synthesis. We demonstrate its effectiveness on a variety of examples, showing that G-buffer editing significantly extends the flexibility of text-guided image generation. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2503.15225" title="Abstract" id="2503.15225"> arXiv:2503.15225 </a> [<a href="/pdf/2503.15225" title="Download PDF" id="pdf-2503.15225" aria-labelledby="pdf-2503.15225">pdf</a>, <a href="https://arxiv.org/html/2503.15225v1" title="View HTML" id="html-2503.15225" aria-labelledby="html-2503.15225" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15225" title="Other formats" id="oth-2503.15225" aria-labelledby="oth-2503.15225">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Personalized Data-Driven Generative Model of Human Motion </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Di+Porzio,+A">Angelo Di Porzio</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Coraggio,+M">Marco Coraggio</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Systems and Control (eess.SY) </div> <p class='mathjax'> The deployment of autonomous virtual avatars (in extended reality) and robots in human group activities - such as rehabilitation therapy, sports, and manufacturing - is expected to increase as these technologies become more pervasive. Designing cognitive architectures and control strategies to drive these agents requires realistic models of human motion. However, existing models only provide simplified descriptions of human motor behavior. In this work, we propose a fully data-driven approach, based on Long Short-Term Memory neural networks, to generate original motion that captures the unique characteristics of specific individuals. We validate the architecture using real data of scalar oscillatory motion. Extensive analyses show that our model effectively replicates the velocity distribution and amplitude envelopes of the individual it was trained on, remaining different from other individuals, and outperforming state-of-the-art models in terms of similarity to human data. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 2 of 2 entries)</h3> <dt> <a name='item7'>[7]</a> <a href ="/abs/2503.14526" title="Abstract" id="2503.14526"> arXiv:2503.14526 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14526" title="Download PDF" id="pdf-2503.14526" aria-labelledby="pdf-2503.14526">pdf</a>, <a href="https://arxiv.org/html/2503.14526v1" title="View HTML" id="html-2503.14526" aria-labelledby="html-2503.14526" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14526" title="Other formats" id="oth-2503.14526" aria-labelledby="oth-2503.14526">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ReBot: Scaling Robot Learning with Real-to-Sim-to-Real Robotic Video Synthesis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Fang,+Y">Yu Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yang,+Y">Yue Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhu,+X">Xinghao Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zheng,+K">Kaiyuan Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Bertasius,+G">Gedas Bertasius</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Szafir,+D">Daniel Szafir</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ding,+M">Mingyu Ding</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Website: <a href="https://yuffish.github.io/rebot/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Graphics (cs.GR); Robotics (cs.RO) </div> <p class='mathjax'> Vision-language-action (VLA) models present a promising paradigm by training policies directly on real robot datasets like Open X-Embodiment. However, the high cost of real-world data collection hinders further data scaling, thereby restricting the generalizability of VLAs. In this paper, we introduce ReBot, a novel real-to-sim-to-real approach for scaling real robot datasets and adapting VLA models to target domains, which is the last-mile deployment challenge in robot manipulation. Specifically, ReBot replays real-world robot trajectories in simulation to diversify manipulated objects (real-to-sim), and integrates the simulated movements with inpainted real-world background to synthesize physically realistic and temporally consistent robot videos (sim-to-real). Our approach has several advantages: 1) it enjoys the benefit of real data to minimize the sim-to-real gap; 2) it leverages the scalability of simulation; and 3) it can generalize a pretrained VLA to a target domain with fully automated data pipelines. Extensive experiments in both simulation and real-world environments show that ReBot significantly enhances the performance and robustness of VLAs. For example, in SimplerEnv with the WidowX robot, ReBot improved the in-domain performance of Octo by 7.2% and OpenVLA by 21.8%, and out-of-domain generalization by 19.9% and 9.4%, respectively. For real-world evaluation with a Franka robot, ReBot increased the success rates of Octo by 17% and OpenVLA by 20%. More information can be found at: <a href="https://yuffish.github.io/rebot/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2503.14573" title="Abstract" id="2503.14573"> arXiv:2503.14573 </a> (cross-list from eess.IV) [<a href="/pdf/2503.14573" title="Download PDF" id="pdf-2503.14573" aria-labelledby="pdf-2503.14573">pdf</a>, <a href="/format/2503.14573" title="Other formats" id="oth-2503.14573" aria-labelledby="oth-2503.14573">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Three-dimensional Reconstruction of the Lumbar Spine with Submillimeter Accuracy Using Biplanar X-ray Images </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Yu,+W">Wanxin Yu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Zhu,+Z">Zhemin Zhu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Wang,+C">Cong Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Bao,+Y">Yihang Bao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Xia,+C">Chunjie Xia</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Cheng,+R">Rongshan Cheng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Yu,+Y">Yan Yu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Tsai,+T">Tsung-Yuan Tsai</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages, 10 figures, 4 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Graphics (cs.GR) </div> <p class='mathjax'> Three-dimensional reconstruction of the spine under weight-bearing conditions from biplanar X-ray images is of great importance for the clinical assessment of spinal diseases. However, the current fully automated reconstruction methods have low accuracy and fail to meet the clinical application standards. This study developed and validated a fully automated method for high-accuracy 3D reconstruction of the lumbar spine from biplanar X-ray images. The method involves lumbar decomposition and landmark detection from the raw X-ray images, followed by a deformable model and landmark-weighted 2D-3D registration approach. The reconstruction accuracy was validated by the gold standard obtained through the registration of CT-segmented vertebral models with the biplanar X-ray images. The proposed method achieved a 3D reconstruction accuracy of 0.80 mm, representing a significant improvement over the mainstream approaches. This study will contribute to the clinical diagnosis of lumbar in weight-bearing positions. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 2 of 2 entries)</h3> <dt> <a name='item9'>[9]</a> <a href ="/abs/2502.19459" title="Abstract" id="2502.19459"> arXiv:2502.19459 </a> (replaced) [<a href="/pdf/2502.19459" title="Download PDF" id="pdf-2502.19459" aria-labelledby="pdf-2502.19459">pdf</a>, <a href="https://arxiv.org/html/2502.19459v2" title="View HTML" id="html-2502.19459" aria-labelledby="html-2502.19459" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.19459" title="Other formats" id="oth-2502.19459" aria-labelledby="oth-2502.19459">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ArtGS: Building Interactable Replicas of Complex Articulated Objects via Gaussian Splatting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Liu,+Y">Yu Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Jia,+B">Baoxiong Jia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lu,+R">Ruijie Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ni,+J">Junfeng Ni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhu,+S">Song-Chun Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Huang,+S">Siyuan Huang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Machine Learning (cs.LG); Robotics (cs.RO) </div> <p class='mathjax'> Building articulated objects is a key challenge in computer vision. Existing methods often fail to effectively integrate information across different object states, limiting the accuracy of part-mesh reconstruction and part dynamics modeling, particularly for complex multi-part articulated objects. We introduce ArtGS, a novel approach that leverages 3D Gaussians as a flexible and efficient representation to address these issues. Our method incorporates canonical Gaussians with coarse-to-fine initialization and updates for aligning articulated part information across different object states, and employs a skinning-inspired part dynamics modeling module to improve both part-mesh reconstruction and articulation learning. Extensive experiments on both synthetic and real-world datasets, including a new benchmark for complex multi-part objects, demonstrate that ArtGS achieves state-of-the-art performance in joint parameter estimation and part mesh reconstruction. Our approach significantly improves reconstruction quality and efficiency, especially for multi-part articulated objects. Additionally, we provide comprehensive analyses of our design choices, validating the effectiveness of each component to highlight potential areas for future improvement. Our work is made publicly available at: <a href="https://articulate-gs.github.io" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2412.01537" title="Abstract" id="2412.01537"> arXiv:2412.01537 </a> (replaced) [<a href="/pdf/2412.01537" title="Download PDF" id="pdf-2412.01537" aria-labelledby="pdf-2412.01537">pdf</a>, <a href="https://arxiv.org/html/2412.01537v2" title="View HTML" id="html-2412.01537" aria-labelledby="html-2412.01537" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.01537" title="Other formats" id="oth-2412.01537" aria-labelledby="oth-2412.01537">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HandOS: 3D Hand Reconstruction in One Stage </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chen,+X">Xingyu Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Song,+Z">Zhuheng Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Jiang,+X">Xiaoke Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Hu,+Y">Yaoqing Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yu,+J">Junzhi Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+L">Lei Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Graphics (cs.GR) </div> <p class='mathjax'> Existing approaches of hand reconstruction predominantly adhere to a multi-stage framework, encompassing detection, left-right classification, and pose estimation. This paradigm induces redundant computation and cumulative errors. In this work, we propose HandOS, an end-to-end framework for 3D hand reconstruction. Our central motivation lies in leveraging a frozen detector as the foundation while incorporating auxiliary modules for 2D and 3D keypoint estimation. In this manner, we integrate the pose estimation capacity into the detection framework, while at the same time obviating the necessity of using the left-right category as a prerequisite. Specifically, we propose an interactive 2D-3D decoder, where 2D joint semantics is derived from detection cues while 3D representation is lifted from those of 2D joints. Furthermore, hierarchical attention is designed to enable the concurrent modeling of 2D joints, 3D vertices, and camera translation. Consequently, we achieve an end-to-end integration of hand detection, 2D pose estimation, and 3D mesh reconstruction within a one-stage framework, so that the above multi-stage drawbacks are overcome. Meanwhile, the HandOS reaches state-of-the-art performances on public benchmarks, e.g., 5.0 PA-MPJPE on FreiHand and 64.6\% PCK@0.05 on HInt-Ego4D. Project page: <a href="http://idea-research.github.io/HandOSweb" rel="external noopener nofollow" class="link-external link-http">this http URL</a>. </p> </div> </dd> </dl> <div class='paging'>Total of 10 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.GR/new?skip=0&amp;show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em"> <!-- Macro-Column 1 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- End Macro-Column 1 --> <!-- Macro-Column 2 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> <!-- End Macro-Column 2 --> </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10