CINXE.COM
Graphics
<!DOCTYPE html> <html lang="en"> <head> <title>Graphics </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a> <!-- start desktop header --> <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/cs.GR/recent">cs.GR</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div><!-- /end desktop header --> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div><!-- /end mobile-header --> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Graphics</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item11">Cross-lists</a></li> <li><a href="#item15">Replacements</a></li> </ul> <p>See <a id="recent-cs.GR" aria-labelledby="recent-cs.GR" href="/list/cs.GR/recent">recent</a> articles</p> <h3>Showing new listings for Friday, 21 March 2025</h3> <div class='paging'>Total of 18 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.GR/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 10 of 10 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2503.15557" title="Abstract" id="2503.15557"> arXiv:2503.15557 </a> [<a href="/pdf/2503.15557" title="Download PDF" id="pdf-2503.15557" aria-labelledby="pdf-2503.15557">pdf</a>, <a href="https://arxiv.org/html/2503.15557v1" title="View HTML" id="html-2503.15557" aria-labelledby="html-2503.15557" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15557" title="Other formats" id="oth-2503.15557" aria-labelledby="oth-2503.15557">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Motion Synthesis with Sparse and Flexible Keyjoint Control </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hwang,+I">Inwoo Hwang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bae,+J">Jinseok Bae</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lim,+D">Donggeun Lim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+Y+M">Young Min Kim</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages, Project Page: <a href="http://inwoohwang.me/SFControl" rel="external noopener nofollow" class="link-external link-http">this http URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computer Vision and Pattern Recognition (cs.CV); Robotics (cs.RO) </div> <p class='mathjax'> Creating expressive character animations is labor-intensive, requiring intricate manual adjustment of animators across space and time. Previous works on controllable motion generation often rely on a predefined set of dense spatio-temporal specifications (e.g., dense pelvis trajectories with exact per-frame timing), limiting practicality for animators. To process high-level intent and intuitive control in diverse scenarios, we propose a practical controllable motions synthesis framework that respects sparse and flexible keyjoint signals. Our approach employs a decomposed diffusion-based motion synthesis framework that first synthesizes keyjoint movements from sparse input control signals and then synthesizes full-body motion based on the completed keyjoint trajectories. The low-dimensional keyjoint movements can easily adapt to various control signal types, such as end-effector position for diverse goal-driven motion synthesis, or incorporate functional constraints on a subset of keyjoints. Additionally, we introduce a time-agnostic control formulation, eliminating the need for frame-specific timing annotations and enhancing control flexibility. Then, the shared second stage can synthesize a natural whole-body motion that precisely satisfies the task requirement from dense keyjoint movements. We demonstrate the effectiveness of sparse and flexible keyjoint control through comprehensive experiments on diverse datasets and scenarios. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2503.15562" title="Abstract" id="2503.15562"> arXiv:2503.15562 </a> [<a href="/pdf/2503.15562" title="Download PDF" id="pdf-2503.15562" aria-labelledby="pdf-2503.15562">pdf</a>, <a href="https://arxiv.org/html/2503.15562v1" title="View HTML" id="html-2503.15562" aria-labelledby="html-2503.15562" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15562" title="Other formats" id="oth-2503.15562" aria-labelledby="oth-2503.15562">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Shap-MeD </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Laverde,+N">Nicol谩s Laverde</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Robles,+M">Melissa Robles</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rodr%C3%ADguez,+J">Johan Rodr铆guez</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computational Engineering, Finance, and Science (cs.CE); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> We present Shap-MeD, a text-to-3D object generative model specialized in the biomedical domain. The objective of this study is to develop an assistant that facilitates the 3D modeling of medical objects, thereby reducing development time. 3D modeling in medicine has various applications, including surgical procedure simulation and planning, the design of personalized prosthetic implants, medical education, the creation of anatomical models, and the development of research prototypes. To achieve this, we leverage Shap-e, an open-source text-to-3D generative model developed by OpenAI, and fine-tune it using a dataset of biomedical objects. Our model achieved a mean squared error (MSE) of 0.089 in latent generation on the evaluation set, compared to Shap-e's MSE of 0.147. Additionally, we conducted a qualitative evaluation, comparing our model with others in the generation of biomedical objects. Our results indicate that Shap-MeD demonstrates higher structural accuracy in biomedical object generation. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2503.15586" title="Abstract" id="2503.15586"> arXiv:2503.15586 </a> [<a href="/pdf/2503.15586" title="Download PDF" id="pdf-2503.15586" aria-labelledby="pdf-2503.15586">pdf</a>, <a href="https://arxiv.org/html/2503.15586v1" title="View HTML" id="html-2503.15586" aria-labelledby="html-2503.15586" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15586" title="Other formats" id="oth-2503.15586" aria-labelledby="oth-2503.15586">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> How to Train Your Dragon: Automatic Diffusion-Based Rigging for Characters with Diverse Topologies </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+Z">Zeqi Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+D">Difan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Langlois,+T">Timothy Langlois</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fisher,+M">Matthew Fisher</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Davis,+A">Abe Davis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to Eurographics 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Recent diffusion-based methods have achieved impressive results on animating images of human subjects. However, most of that success has built on human-specific body pose representations and extensive training with labeled real videos. In this work, we extend the ability of such models to animate images of characters with more diverse skeletal topologies. Given a small number (3-5) of example frames showing the character in different poses with corresponding skeletal information, our model quickly infers a rig for that character that can generate images corresponding to new skeleton poses. We propose a procedural data generation pipeline that efficiently samples training data with diverse topologies on the fly. We use it, along with a novel skeleton representation, to train our model on articulated shapes spanning a large space of textures and topologies. Then during fine-tuning, our model rapidly adapts to unseen target characters and generalizes well to rendering new poses, both for realistic and more stylized cartoon appearances. To better evaluate performance on this novel and challenging task, we create the first 2D video dataset that contains both humanoid and non-humanoid subjects with per-frame keypoint annotations. With extensive experiments, we demonstrate the superior quality of our results. Project page: <a href="https://traindragondiffusion.github.io/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2503.15809" title="Abstract" id="2503.15809"> arXiv:2503.15809 </a> [<a href="/pdf/2503.15809" title="Download PDF" id="pdf-2503.15809" aria-labelledby="pdf-2503.15809">pdf</a>, <a href="https://arxiv.org/html/2503.15809v1" title="View HTML" id="html-2503.15809" aria-labelledby="html-2503.15809" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15809" title="Other formats" id="oth-2503.15809" aria-labelledby="oth-2503.15809">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Controlling Avatar Diffusion with Learnable Gaussian Embedding </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+X">Xuan Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+J">Jingtao Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+D">Dongyu Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Y">Yuqi Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Juyong Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project Page: <a href="https://ustc3dv.github.io/Learn2Control/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Recent advances in diffusion models have made significant progress in digital human generation. However, most existing models still struggle to maintain 3D consistency, temporal coherence, and motion accuracy. A key reason for these shortcomings is the limited representation ability of commonly used control signals(e.g., landmarks, depth maps, etc.). In addition, the lack of diversity in identity and pose variations in public datasets further hinders progress in this area. In this paper, we analyze the shortcomings of current control signals and introduce a novel control signal representation that is optimizable, dense, expressive, and 3D consistent. Our method embeds a learnable neural Gaussian onto a parametric head surface, which greatly enhances the consistency and expressiveness of diffusion-based head models. Regarding the dataset, we synthesize a large-scale dataset with multiple poses and identities. In addition, we use real/synthetic labels to effectively distinguish real and synthetic data, minimizing the impact of imperfections in synthetic data on the generated head images. Extensive experiments show that our model outperforms existing methods in terms of realism, expressiveness, and 3D consistency. Our code, synthetic datasets, and pre-trained models will be released in our project page: <a href="https://ustc3dv.github.io/Learn2Control/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2503.15996" title="Abstract" id="2503.15996"> arXiv:2503.15996 </a> [<a href="/pdf/2503.15996" title="Download PDF" id="pdf-2503.15996" aria-labelledby="pdf-2503.15996">pdf</a>, <a href="https://arxiv.org/html/2503.15996v1" title="View HTML" id="html-2503.15996" aria-labelledby="html-2503.15996" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15996" title="Other formats" id="oth-2503.15996" aria-labelledby="oth-2503.15996">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Animating the Uncaptured: Humanoid Mesh Animation with Video Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mill%C3%A1n,+M+B+S">Marc Bened铆 San Mill谩n</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+A">Angela Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nie%C3%9Fner,+M">Matthias Nie脽ner</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 10 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Animation of humanoid characters is essential in various graphics applications, but requires significant time and cost to create realistic animations. We propose an approach to synthesize 4D animated sequences of input static 3D humanoid meshes, leveraging strong generalized motion priors from generative video models -- as such video models contain powerful motion information covering a wide variety of human motions. From an input static 3D humanoid mesh and a text prompt describing the desired animation, we synthesize a corresponding video conditioned on a rendered image of the 3D mesh. We then employ an underlying SMPL representation to animate the corresponding 3D mesh according to the video-generated motion, based on our motion optimization. This enables a cost-effective and accessible solution to enable the synthesis of diverse and realistic 4D animations. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2503.16129" title="Abstract" id="2503.16129"> arXiv:2503.16129 </a> [<a href="/pdf/2503.16129" title="Download PDF" id="pdf-2503.16129" aria-labelledby="pdf-2503.16129">pdf</a>, <a href="https://arxiv.org/html/2503.16129v1" title="View HTML" id="html-2503.16129" aria-labelledby="html-2503.16129" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16129" title="Other formats" id="oth-2503.16129" aria-labelledby="oth-2503.16129">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Controllable Segmentation-Based Text-Guided Style Editing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J">Jingwen Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chandrasekar,+A">Aravind Chandrasekar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rocha,+M">Mariana Rocha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+C">Chao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yuqing Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span> </div> <p class='mathjax'> We present a novel approach for controllable, region-specific style editing driven by textual prompts. Building upon the state-space style alignment framework introduced by \emph{StyleMamba}, our method integrates a semantic segmentation model into the style transfer pipeline. This allows users to selectively apply text-driven style changes to specific segments (e.g., ``turn the building into a cyberpunk tower'') while leaving other regions (e.g., ``people'' or ``trees'') unchanged. By incorporating region-wise condition vectors and a region-specific directional loss, our method achieves high-fidelity transformations that respect both semantic boundaries and user-driven style descriptions. Extensive experiments demonstrate that our approach can flexibly handle complex scene stylizations in real-world scenarios, improving control and quality over purely global style transfer methods. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2503.16133" title="Abstract" id="2503.16133"> arXiv:2503.16133 </a> [<a href="/pdf/2503.16133" title="Download PDF" id="pdf-2503.16133" aria-labelledby="pdf-2503.16133">pdf</a>, <a href="https://arxiv.org/html/2503.16133v1" title="View HTML" id="html-2503.16133" aria-labelledby="html-2503.16133" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16133" title="Other formats" id="oth-2503.16133" aria-labelledby="oth-2503.16133">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-Prompt Style Interpolation for Fine-Grained Artistic Control </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+L">Lei Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Hao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yuxin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+C">Chao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+K">Kai Wen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span> </div> <p class='mathjax'> Text-driven image style transfer has seen remarkable progress with methods leveraging cross-modal embeddings for fast, high-quality stylization. However, most existing pipelines assume a \emph{single} textual style prompt, limiting the range of artistic control and expressiveness. In this paper, we propose a novel \emph{multi-prompt style interpolation} framework that extends the recently introduced \textbf{StyleMamba} approach. Our method supports blending or interpolating among multiple textual prompts (eg, ``cubism,'' ``impressionism,'' and ``cartoon''), allowing the creation of nuanced or hybrid artistic styles within a \emph{single} image. We introduce a \textit{Multi-Prompt Embedding Mixer} combined with \textit{Adaptive Blending Weights} to enable fine-grained control over the spatial and semantic influence of each style. Further, we propose a \emph{Hierarchical Masked Directional Loss} to refine region-specific style consistency. Experiments and user studies confirm our approach outperforms single-prompt baselines and naive linear combinations of styles, achieving superior style fidelity, text-image alignment, and artistic flexibility, all while maintaining the computational efficiency offered by the state-space formulation. </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2503.16177" title="Abstract" id="2503.16177"> arXiv:2503.16177 </a> [<a href="/pdf/2503.16177" title="Download PDF" id="pdf-2503.16177" aria-labelledby="pdf-2503.16177">pdf</a>, <a href="https://arxiv.org/html/2503.16177v1" title="View HTML" id="html-2503.16177" aria-labelledby="html-2503.16177" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16177" title="Other formats" id="oth-2503.16177" aria-labelledby="oth-2503.16177">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> OccluGaussian: Occlusion-Aware Gaussian Splatting for Large Scene Reconstruction and Rendering </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+S">Shiyong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+X">Xiao Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Z">Zhihao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+Y">Yingfan He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+C">Chongjie Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Jianzhuang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+B">Binxiao Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+S">Shunbo Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+X">Xiaofei Wu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project website: <a href="https://occlugaussian.github.io" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> In large-scale scene reconstruction using 3D Gaussian splatting, it is common to partition the scene into multiple smaller regions and reconstruct them individually. However, existing division methods are occlusion-agnostic, meaning that each region may contain areas with severe occlusions. As a result, the cameras within those regions are less correlated, leading to a low average contribution to the overall reconstruction. In this paper, we propose an occlusion-aware scene division strategy that clusters training cameras based on their positions and co-visibilities to acquire multiple regions. Cameras in such regions exhibit stronger correlations and a higher average contribution, facilitating high-quality scene reconstruction. We further propose a region-based rendering technique to accelerate large scene rendering, which culls Gaussians invisible to the region where the viewpoint is located. Such a technique significantly speeds up the rendering without compromising quality. Extensive experiments on multiple large scenes show that our method achieves superior reconstruction results with faster rendering speed compared to existing state-of-the-art approaches. Project page: <a href="https://occlugaussian.github.io" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2503.16406" title="Abstract" id="2503.16406"> arXiv:2503.16406 </a> [<a href="/pdf/2503.16406" title="Download PDF" id="pdf-2503.16406" aria-labelledby="pdf-2503.16406">pdf</a>, <a href="https://arxiv.org/html/2503.16406v1" title="View HTML" id="html-2503.16406" aria-labelledby="html-2503.16406" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16406" title="Other formats" id="oth-2503.16406" aria-labelledby="oth-2503.16406">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VerbDiff: Text-Only Diffusion Models with Enhanced Interaction Awareness </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cha,+S">SeungJu Cha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+K">Kwanyoung Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+Y">Ye-Chan Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oh,+H">Hyunwoo Oh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+D">Dong-Jin Kim</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at CVPR 2025, code : <a href="https://github.com/SeungJuCha/VerbDiff.git" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computer Vision and Pattern Recognition (cs.CV); Multimedia (cs.MM) </div> <p class='mathjax'> Recent large-scale text-to-image diffusion models generate photorealistic images but often struggle to accurately depict interactions between humans and objects due to their limited ability to differentiate various interaction words. In this work, we propose VerbDiff to address the challenge of capturing nuanced interactions within text-to-image diffusion models. VerbDiff is a novel text-to-image generation model that weakens the bias between interaction words and objects, enhancing the understanding of interactions. Specifically, we disentangle various interaction words from frequency-based anchor words and leverage localized interaction regions from generated images to help the model better capture semantics in distinctive words without extra conditions. Our approach enables the model to accurately understand the intended interaction between humans and objects, producing high-quality images with accurate interactions aligned with specified verbs. Extensive experiments on the HICO-DET dataset demonstrate the effectiveness of our method compared to previous approaches. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2503.16424" title="Abstract" id="2503.16424"> arXiv:2503.16424 </a> [<a href="/pdf/2503.16424" title="Download PDF" id="pdf-2503.16424" aria-labelledby="pdf-2503.16424">pdf</a>, <a href="https://arxiv.org/html/2503.16424v1" title="View HTML" id="html-2503.16424" aria-labelledby="html-2503.16424" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16424" title="Other formats" id="oth-2503.16424" aria-labelledby="oth-2503.16424">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> B茅zier Splatting for Fast and Differentiable Vector Graphics </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xi Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+C">Chaoyi Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+N">Nanxuan Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+S">Siyu Huang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Differentiable vector graphics (VGs) are widely used in image vectorization and vector synthesis, while existing representations are costly to optimize and struggle to achieve high-quality rendering results for high-resolution images. This work introduces a new differentiable VG representation, dubbed B茅zier splatting, that enables fast yet high-fidelity VG rasterization. B茅zier splatting samples 2D Gaussians along B茅zier curves, which naturally provide positional gradients at object boundaries. Thanks to the efficient splatting-based differentiable rasterizer, B茅zier splatting achieves over 20x and 150x faster per forward and backward rasterization step for open curves compared to DiffVG. Additionally, we introduce an adaptive pruning and densification strategy that dynamically adjusts the spatial distribution of curves to escape local minima, further improving VG quality. Experimental results show that B茅zier splatting significantly outperforms existing methods with better visual fidelity and 10x faster optimization speed. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 4 of 4 entries)</h3> <dt> <a name='item11'>[11]</a> <a href ="/abs/2503.15505" title="Abstract" id="2503.15505"> arXiv:2503.15505 </a> (cross-list from cs.HC) [<a href="/pdf/2503.15505" title="Download PDF" id="pdf-2503.15505" aria-labelledby="pdf-2503.15505">pdf</a>, <a href="https://arxiv.org/html/2503.15505v1" title="View HTML" id="html-2503.15505" aria-labelledby="html-2503.15505" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15505" title="Other formats" id="oth-2503.15505" aria-labelledby="oth-2503.15505">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sensitivity to Redirected Walking Considering Gaze, Posture, and Luminance </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Williams,+N+L">Niall L. Williams</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stevens,+L+C">Logan C. Stevens</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bera,+A">Aniket Bera</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Manocha,+D">Dinesh Manocha</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted for publication in TVCG, proceedings of IEEE VR 2025 conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Graphics (cs.GR) </div> <p class='mathjax'> We study the correlations between redirected walking (RDW) rotation gains and patterns in users' posture and gaze data during locomotion in virtual reality (VR). To do this, we conducted a psychophysical experiment to measure users' sensitivity to RDW rotation gains and collect gaze and posture data during the experiment. Using multilevel modeling, we studied how different factors of the VR system and user affected their physiological signals. In particular, we studied the effects of redirection gain, trial duration, trial number (i.e., time spent in VR), and participant gender on postural sway, gaze velocity (a proxy for gaze stability), and saccade and blink rate. Our results showed that, in general, physiological signals were significantly positively correlated with the strength of redirection gain, the duration of trials, and the trial number. Gaze velocity was negatively correlated with trial duration. Additionally, we measured users' sensitivity to rotation gains in well-lit (photopic) and dimly-lit (mesopic) virtual lighting conditions. Results showed that there were no significant differences in RDW detection thresholds between the photopic and mesopic luminance conditions. </p> </div> </dd> <dt> <a name='item12'>[12]</a> <a href ="/abs/2503.15506" title="Abstract" id="2503.15506"> arXiv:2503.15506 </a> (cross-list from cs.HC) [<a href="/pdf/2503.15506" title="Download PDF" id="pdf-2503.15506" aria-labelledby="pdf-2503.15506">pdf</a>, <a href="/format/2503.15506" title="Other formats" id="oth-2503.15506" aria-labelledby="oth-2503.15506">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Effectiveness of machining equipment user guides: A comparative study of augmented reality and traditional media </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ghobrial,+M">Mina Ghobrial</a> (INSA Toulouse, INUC), <a href="https://arxiv.org/search/cs?searchtype=author&query=Seitier,+P">Philippe Seitier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lagarrigue,+P">Pierre Lagarrigue</a> (ICA), <a href="https://arxiv.org/search/cs?searchtype=author&query=Galaup,+M">Michel Galaup</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gilles,+P">Patrick Gilles</a> (ICA)</div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Material Forming, Apr 2024, toulouse, France. pp.2320-2328 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Graphics (cs.GR) </div> <p class='mathjax'> In the rapidly evolving landscape of manufacturing and material forming, innovative strategies are imperative for maintaining a competitive edge. Augmented Reality (AR) has emerged as a groundbreaking technology, offering new dimensions in how information is displayed and interacted with. It holds particular promise in the panel of instructional guides for complex machinery, potentially enhance traditional methods of knowledge transfer and operator training. Material forming, a key discipline within mechanical engineering, requires high-precision and skill, making it an ideal candidate for the integration of advanced instructional technologies like AR. This study aims to explore the efficiency of three distinct types of user manuals-video, paper, and augmented reality (AR)-on performance and acceptability in a material forming workshop environment. The focus will be on how AR can be specifically applied to improve task execution and understanding in material forming operations. Participants are mechanical engineering students specializing in material forming. They will engage in a series of standardized tasks related to machining processes. Performance will be gauged by metrics like task completion time and error rates, while task load will be assessed via the NASA Task Load Index (NASA-TLX) [1]. Acceptability of each manual type will be evaluated using the System Usability Scale (SUS) [2]. By comparing these various instructional formats, this research seeks to shed light on the most effective mediums for enhancing both operator performance and experience. </p> </div> </dd> <dt> <a name='item13'>[13]</a> <a href ="/abs/2503.15507" title="Abstract" id="2503.15507"> arXiv:2503.15507 </a> (cross-list from cs.HC) [<a href="/pdf/2503.15507" title="Download PDF" id="pdf-2503.15507" aria-labelledby="pdf-2503.15507">pdf</a>, <a href="https://arxiv.org/html/2503.15507v1" title="View HTML" id="html-2503.15507" aria-labelledby="html-2503.15507" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15507" title="Other formats" id="oth-2503.15507" aria-labelledby="oth-2503.15507">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CvhSlicer 2.0: Immersive and Interactive Visualization of Chinese Visible Human Data in XR Environments </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Qiu,+Y">Yue Qiu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tong,+Y">Yuqi Tong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yu Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Q">Qixuan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pei,+J">Jialun Pei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiu,+S">Shi Qiu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Heng,+P">Pheng-Ann Heng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+C">Chi-Wing Fu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> IEEE VR 2025 Posters </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Graphics (cs.GR); Multimedia (cs.MM) </div> <p class='mathjax'> The study of human anatomy through advanced visualization techniques is crucial for medical research and education. In this work, we introduce CvhSlicer 2.0, an innovative XR system designed for immersive and interactive visualization of the Chinese Visible Human (CVH) dataset. Particularly, our proposed system operates entirely on a commercial XR headset, offering a range of visualization and interaction tools for dynamic 2D and 3D data exploration. By conducting comprehensive evaluations, our CvhSlicer 2.0 demonstrates strong capabilities in visualizing anatomical data, enhancing user engagement and improving educational effectiveness. A demo video is available at <a href="https://youtu.be/CfR72S_0N-4" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item14'>[14]</a> <a href ="/abs/2503.16135" title="Abstract" id="2503.16135"> arXiv:2503.16135 </a> (cross-list from cs.HC) [<a href="/pdf/2503.16135" title="Download PDF" id="pdf-2503.16135" aria-labelledby="pdf-2503.16135">pdf</a>, <a href="https://arxiv.org/html/2503.16135v1" title="View HTML" id="html-2503.16135" aria-labelledby="html-2503.16135" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16135" title="Other formats" id="oth-2503.16135" aria-labelledby="oth-2503.16135">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Malleable Glyph (Challenge) </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Herout,+A">Adam Herout</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bartl,+V">Vojt臎ch Bartl</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gaens,+M">Martin Gaens</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tvr%C4%8Foch,+O">Oskar Tvr膹och</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Graphics (cs.GR) </div> <p class='mathjax'> Malleable Glyph is a new visualization problem and a public challenge. It originated from UX research (namely from research on card sorting UX), but its applications can be diverse (UI, gaming, information presentation, maps, and others). Its essence is: carrying as much information in a defined planar and static area as possible. The information should allow human observers to evaluate a pair of glyphs into three possible sortings: the first is "greater", or the second is "greater", or both are equal. The glyphs should adhere to the Illiteracy Rule, in other words, the observer should ask themselves the question "how much?" rather than "how many?". This article motivates the technique, explains its details, and presents the public challenge, including the evaluation protocol. <br>The article aims to call for ideas from other visualization and graphics researchers and practitioners and to invite everyone to participate in the challenge and, by doing so, move scientific knowledge forward. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 4 of 4 entries)</h3> <dt> <a name='item15'>[15]</a> <a href ="/abs/2212.11715" title="Abstract" id="2212.11715"> arXiv:2212.11715 </a> (replaced) [<a href="/pdf/2212.11715" title="Download PDF" id="pdf-2212.11715" aria-labelledby="pdf-2212.11715">pdf</a>, <a href="https://arxiv.org/html/2212.11715v2" title="View HTML" id="html-2212.11715" aria-labelledby="html-2212.11715" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2212.11715" title="Other formats" id="oth-2212.11715" aria-labelledby="oth-2212.11715">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> GeoCode: Interpretable Shape Programs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pearl,+O">Ofek Pearl</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lang,+I">Itai Lang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Y">Yuhua Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yeh,+R+A">Raymond A. Yeh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hanocka,+R">Rana Hanocka</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> project page: <a href="https://threedle.github.io/GeoCode/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span> </div> <p class='mathjax'> The task of crafting procedural programs capable of generating structurally valid 3D shapes easily and intuitively remains an elusive goal in computer vision and graphics. Within the graphics community, generating procedural 3D models has shifted to using node graph systems. They allow the artist to create complex shapes and animations through visual programming. Being a high-level design tool, they made procedural 3D modeling more accessible. However, crafting those node graphs demands expertise and training. We present GeoCode, a novel framework designed to extend an existing node graph system and significantly lower the bar for the creation of new procedural 3D shape programs. Our approach meticulously balances expressiveness and generalization for part-based shapes. We propose a curated set of new geometric building blocks that are expressive and reusable across domains. We showcase three innovative and expressive programs developed through our technique and geometric building blocks. Our programs enforce intricate rules, empowering users to execute intuitive high-level parameter edits that seamlessly propagate throughout the entire shape at a lower level while maintaining its validity. To evaluate the user-friendliness of our geometric building blocks among non-experts, we conducted a user study that demonstrates their ease of use and highlights their applicability across diverse domains. Empirical evidence shows the superior accuracy of GeoCode in inferring and recovering 3D shapes compared to an existing competitor. Furthermore, our method demonstrates superior expressiveness compared to alternatives that utilize coarse primitives. Notably, we illustrate the ability to execute controllable local and global shape manipulations. </p> </div> </dd> <dt> <a name='item16'>[16]</a> <a href ="/abs/2411.19942" title="Abstract" id="2411.19942"> arXiv:2411.19942 </a> (replaced) [<a href="/pdf/2411.19942" title="Download PDF" id="pdf-2411.19942" aria-labelledby="pdf-2411.19942">pdf</a>, <a href="https://arxiv.org/html/2411.19942v2" title="View HTML" id="html-2411.19942" aria-labelledby="html-2411.19942" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.19942" title="Other formats" id="oth-2411.19942" aria-labelledby="oth-2411.19942">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FreeCloth: Free-form Generation Enhances Challenging Clothed Human Modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+H">Hang Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+X">Xiaoxuan Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ci,+H">Hai Ci</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+W">Wentao Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yizhou Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 23 pages, 26 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Graphics (cs.GR); Machine Learning (cs.LG) </div> <p class='mathjax'> Achieving realistic animated human avatars requires accurate modeling of pose-dependent clothing deformations. Existing learning-based methods heavily rely on the Linear Blend Skinning (LBS) of minimally-clothed human models like SMPL to model deformation. However, they struggle to handle loose clothing, such as long dresses, where the canonicalization process becomes ill-defined when the clothing is far from the body, leading to disjointed and fragmented results. To overcome this limitation, we propose FreeCloth, a novel hybrid framework to model challenging clothed humans. Our core idea is to use dedicated strategies to model different regions, depending on whether they are close to or distant from the body. Specifically, we segment the human body into three categories: unclothed, deformed, and generated. We simply replicate unclothed regions that require no deformation. For deformed regions close to the body, we leverage LBS to handle the deformation. As for the generated regions, which correspond to loose clothing areas, we introduce a novel free-form, part-aware generator to model them, as they are less affected by movements. This free-form generation paradigm brings enhanced flexibility and expressiveness to our hybrid framework, enabling it to capture the intricate geometric details of challenging loose clothing, such as skirts and dresses. Experimental results on the benchmark dataset featuring loose clothing demonstrate that FreeCloth achieves state-of-the-art performance with superior visual fidelity and realism, particularly in the most challenging cases. </p> </div> </dd> <dt> <a name='item17'>[17]</a> <a href ="/abs/2501.13928" title="Abstract" id="2501.13928"> arXiv:2501.13928 </a> (replaced) [<a href="/pdf/2501.13928" title="Download PDF" id="pdf-2501.13928" aria-labelledby="pdf-2501.13928">pdf</a>, <a href="https://arxiv.org/html/2501.13928v2" title="View HTML" id="html-2501.13928" aria-labelledby="html-2501.13928" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.13928" title="Other formats" id="oth-2501.13928" aria-labelledby="oth-2501.13928">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+J">Jianing Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sax,+A">Alexander Sax</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+K+J">Kevin J. Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Henaff,+M">Mikael Henaff</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+H">Hao Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+A">Ang Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chai,+J">Joyce Chai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meier,+F">Franziska Meier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feiszli,+M">Matt Feiszli</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> CVPR 2025. Project website: <a href="https://fast3r-3d.github.io/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Graphics (cs.GR); Robotics (cs.RO) </div> <p class='mathjax'> Multi-view 3D reconstruction remains a core challenge in computer vision, particularly in applications requiring accurate and scalable representations across diverse perspectives. Current leading methods such as DUSt3R employ a fundamentally pairwise approach, processing images in pairs and necessitating costly global alignment procedures to reconstruct from multiple views. In this work, we propose Fast 3D Reconstruction (Fast3R), a novel multi-view generalization to DUSt3R that achieves efficient and scalable 3D reconstruction by processing many views in parallel. Fast3R's Transformer-based architecture forwards N images in a single forward pass, bypassing the need for iterative alignment. Through extensive experiments on camera pose estimation and 3D reconstruction, Fast3R demonstrates state-of-the-art performance, with significant improvements in inference speed and reduced error accumulation. These results establish Fast3R as a robust alternative for multi-view applications, offering enhanced scalability without compromising reconstruction accuracy. </p> </div> </dd> <dt> <a name='item18'>[18]</a> <a href ="/abs/2503.12552" title="Abstract" id="2503.12552"> arXiv:2503.12552 </a> (replaced) [<a href="/pdf/2503.12552" title="Download PDF" id="pdf-2503.12552" aria-labelledby="pdf-2503.12552">pdf</a>, <a href="https://arxiv.org/html/2503.12552v2" title="View HTML" id="html-2503.12552" aria-labelledby="html-2503.12552" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.12552" title="Other formats" id="oth-2503.12552" aria-labelledby="oth-2503.12552">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MTGS: Multi-Traversal Gaussian Splatting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+T">Tianyu Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiu,+Y">Yihang Qiu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Z">Zhenhua Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lindstr%C3%B6m,+C">Carl Lindstr枚m</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Su,+P">Peng Su</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nie%C3%9Fner,+M">Matthias Nie脽ner</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Hongyang Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Graphics (cs.GR) </div> <p class='mathjax'> Multi-traversal data, commonly collected through daily commutes or by self-driving fleets, provides multiple viewpoints for scene reconstruction within a road block. This data offers significant potential for high-quality novel view synthesis, which is crucial for applications such as autonomous vehicle simulators. However, inherent challenges in multi-traversal data often result in suboptimal reconstruction quality, including variations in appearance and the presence of dynamic objects. To address these issues, we propose Multi-Traversal Gaussian Splatting (MTGS), a novel approach that reconstructs high-quality driving scenes from arbitrarily collected multi-traversal data by modeling a shared static geometry while separately handling dynamic elements and appearance variations. Our method employs a multi-traversal dynamic scene graph with a shared static node and traversal-specific dynamic nodes, complemented by color correction nodes with learnable spherical harmonics coefficient residuals. This approach enables high-fidelity novel view synthesis and provides flexibility to navigate any viewpoint. We conduct extensive experiments on a large-scale driving dataset, nuPlan, with multi-traversal data. Our results demonstrate that MTGS improves LPIPS by 23.5% and geometry accuracy by 46.3% compared to single-traversal baselines. The code and data would be available to the public. </p> </div> </dd> </dl> <div class='paging'>Total of 18 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.GR/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em"> <!-- Macro-Column 1 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- End Macro-Column 1 --> <!-- Macro-Column 2 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> <!-- End Macro-Column 2 --> </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>