Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 130 results for author: <span class="mathjax">Cao, W</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Cao%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Cao, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Cao%2C+W&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Cao, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Cao%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Cao%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Cao%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Cao%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.18730">arXiv:2503.18730</a> <span> [<a href="https://arxiv.org/pdf/2503.18730">pdf</a>, <a href="https://arxiv.org/format/2503.18730">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Predicting the Road Ahead: A Knowledge Graph based Foundation Model for Scene Understanding in Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hongkuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Schmid%2C+S">Stefan Schmid</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yicong Li</a>, <a href="/search/cs?searchtype=author&query=Halilaj%2C+L">Lavdim Halilaj</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiangtong Yao</a>, <a href="/search/cs?searchtype=author&query=cao%2C+W">Wei cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.18730v1-abstract-short" style="display: inline;"> The autonomous driving field has seen remarkable advancements in various topics, such as object recognition, trajectory prediction, and motion planning. However, current approaches face limitations in effectively comprehending the complex evolutions of driving scenes over time. This paper proposes FM4SU, a novel methodology for training a symbolic foundation model (FM) for scene understanding in a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18730v1-abstract-full').style.display = 'inline'; document.getElementById('2503.18730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.18730v1-abstract-full" style="display: none;"> The autonomous driving field has seen remarkable advancements in various topics, such as object recognition, trajectory prediction, and motion planning. However, current approaches face limitations in effectively comprehending the complex evolutions of driving scenes over time. This paper proposes FM4SU, a novel methodology for training a symbolic foundation model (FM) for scene understanding in autonomous driving. It leverages knowledge graphs (KGs) to capture sensory observation along with domain knowledge such as road topology, traffic rules, or complex interactions between traffic participants. A bird's eye view (BEV) symbolic representation is extracted from the KG for each driving scene, including the spatio-temporal information among the objects across the scenes. The BEV representation is serialized into a sequence of tokens and given to pre-trained language models (PLMs) for learning an inherent understanding of the co-occurrence among driving scene elements and generating predictions on the next scenes. We conducted a number of experiments using the nuScenes dataset and KG in various scenarios. The results demonstrate that fine-tuned models achieve significantly higher accuracy in all tasks. The fine-tuned T5 model achieved a next scene prediction accuracy of 86.7%. This paper concludes that FM4SU offers a promising foundation for developing more comprehensive models for scene understanding in autonomous driving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18730v1-abstract-full').style.display = 'none'; document.getElementById('2503.18730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.04050">arXiv:2503.04050</a> <span> [<a href="https://arxiv.org/pdf/2503.04050">pdf</a>, <a href="https://arxiv.org/format/2503.04050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Underlying Semantic Diffusion for Effective and Efficient In-Context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+Z">Zhong Ji</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weilong Cao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yan Zhang</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+Y">Yanwei Pang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jungong Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuelong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.04050v1-abstract-short" style="display: inline;"> Diffusion models has emerged as a powerful framework for tasks like image controllable generation and dense prediction. However, existing models often struggle to capture underlying semantics (e.g., edges, textures, shapes) and effectively utilize in-context learning, limiting their contextual understanding and image generation quality. Additionally, high computational costs and slow inference spe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.04050v1-abstract-full').style.display = 'inline'; document.getElementById('2503.04050v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.04050v1-abstract-full" style="display: none;"> Diffusion models has emerged as a powerful framework for tasks like image controllable generation and dense prediction. However, existing models often struggle to capture underlying semantics (e.g., edges, textures, shapes) and effectively utilize in-context learning, limiting their contextual understanding and image generation quality. Additionally, high computational costs and slow inference speeds hinder their real-time applicability. To address these challenges, we propose Underlying Semantic Diffusion (US-Diffusion), an enhanced diffusion model that boosts underlying semantics learning, computational efficiency, and in-context learning capabilities on multi-task scenarios. We introduce Separate & Gather Adapter (SGA), which decouples input conditions for different tasks while sharing the architecture, enabling better in-context learning and generalization across diverse visual domains. We also present a Feedback-Aided Learning (FAL) framework, which leverages feedback signals to guide the model in capturing semantic details and dynamically adapting to task-specific contextual cues. Furthermore, we propose a plug-and-play Efficient Sampling Strategy (ESS) for dense sampling at time steps with high-noise levels, which aims at optimizing training and inference efficiency while maintaining strong in-context learning performance. Experimental results demonstrate that US-Diffusion outperforms the state-of-the-art method, achieving an average reduction of 7.47 in FID on Map2Image tasks and an average reduction of 0.026 in RMSE on Image2Map tasks, while achieving approximately 9.45 times faster inference speed. Our method also demonstrates superior training efficiency and in-context learning capabilities, excelling in new datasets and tasks, highlighting its robustness and adaptability across diverse visual domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.04050v1-abstract-full').style.display = 'none'; document.getElementById('2503.04050v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.00205">arXiv:2503.00205</a> <span> [<a href="https://arxiv.org/pdf/2503.00205">pdf</a>, <a href="https://arxiv.org/format/2503.00205">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> AnalogGenie: A Generative Engine for Automatic Discovery of Analog Circuit Topologies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jian Gao</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weidong Cao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Junyi Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.00205v1-abstract-short" style="display: inline;"> The massive and large-scale design of foundational semiconductor integrated circuits (ICs) is crucial to sustaining the advancement of many emerging and future technologies, such as generative AI, 5G/6G, and quantum computing. Excitingly, recent studies have shown the great capabilities of foundational models in expediting the design of digital ICs. Yet, applying generative AI techniques to accele… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00205v1-abstract-full').style.display = 'inline'; document.getElementById('2503.00205v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.00205v1-abstract-full" style="display: none;"> The massive and large-scale design of foundational semiconductor integrated circuits (ICs) is crucial to sustaining the advancement of many emerging and future technologies, such as generative AI, 5G/6G, and quantum computing. Excitingly, recent studies have shown the great capabilities of foundational models in expediting the design of digital ICs. Yet, applying generative AI techniques to accelerate the design of analog ICs remains a significant challenge due to critical domain-specific issues, such as the lack of a comprehensive dataset and effective representation methods for analog circuits. This paper proposes, $\textbf{AnalogGenie}$, a $\underline{\textbf{Gen}}$erat$\underline{\textbf{i}}$ve $\underline{\textbf{e}}$ngine for automatic design/discovery of $\underline{\textbf{Analog}}$ circuit topologies--the most challenging and creative task in the conventional manual design flow of analog ICs. AnalogGenie addresses two key gaps in the field: building a foundational comprehensive dataset of analog circuit topology and developing a scalable sequence-based graph representation universal to analog circuits. Experimental results show the remarkable generation performance of AnalogGenie in broadening the variety of analog ICs, increasing the number of devices within a single design, and discovering unseen circuit topologies far beyond any prior arts. Our work paves the way to transform the longstanding time-consuming manual design flow of analog ICs to an automatic and massive manner powered by generative AI. Our source code is available at https://github.com/xz-group/AnalogGenie. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00205v1-abstract-full').style.display = 'none'; document.getElementById('2503.00205v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025 camera ready</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.19316">arXiv:2502.19316</a> <span> [<a href="https://arxiv.org/pdf/2502.19316">pdf</a>, <a href="https://arxiv.org/format/2502.19316">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/CVPR42600.2020.00966">10.1109/CVPR42600.2020.00966 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Model Adaptation: Unsupervised Domain Adaptation without Source Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+Q">Qianfen Jiao</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenming Cao</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+H">Hau-San Wong</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Si Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.19316v1-abstract-short" style="display: inline;"> In this paper, we investigate a challenging unsupervised domain adaptation setting -- unsupervised model adaptation. We aim to explore how to rely only on unlabeled target data to improve performance of an existing source prediction model on the target domain, since labeled source data may not be available in some real-world scenarios due to data privacy issues. For this purpose, we propose a new… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19316v1-abstract-full').style.display = 'inline'; document.getElementById('2502.19316v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.19316v1-abstract-full" style="display: none;"> In this paper, we investigate a challenging unsupervised domain adaptation setting -- unsupervised model adaptation. We aim to explore how to rely only on unlabeled target data to improve performance of an existing source prediction model on the target domain, since labeled source data may not be available in some real-world scenarios due to data privacy issues. For this purpose, we propose a new framework, which is referred to as collaborative class conditional generative adversarial net to bypass the dependence on the source data. Specifically, the prediction model is to be improved through generated target-style data, which provides more accurate guidance for the generator. As a result, the generator and the prediction model can collaborate with each other without source data. Furthermore, due to the lack of supervision from source data, we propose a weight constraint that encourages similarity to the source model. A clustering-based regularization is also introduced to produce more discriminative features in the target domain. Compared to conventional domain adaptation methods, our model achieves superior performance on multiple adaptation tasks with only unlabeled target data, which verifies its effectiveness in this challenging setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19316v1-abstract-full').style.display = 'none'; document.getElementById('2502.19316v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by CVPR2020</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> https://openaccess.thecvf.com/content_CVPR_2020/html/Li_Model_Adaptation_Unsupervised_Domain_Adaptation_Without_Source_Data_CVPR_2020_paper.html </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11121">arXiv:2502.11121</a> <span> [<a href="https://arxiv.org/pdf/2502.11121">pdf</a>, <a href="https://arxiv.org/format/2502.11121">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Reversible Data Hiding over Encrypted Images via Intrinsic Correlation in Block-Based Secret Sharing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+J">Jianhui Zou</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weijia Cao</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+S">Shuang Yi</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yifeng Zheng</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Z">Zhongyun Hua</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11121v1-abstract-short" style="display: inline;"> With the rapid advancements in information technology, reversible data hiding over encrypted images (RDH-EI) has become essential for secure image management in cloud services. However, existing RDH-EI schemes often suffer from high computational complexity, low embedding rates, and excessive data expansion. This paper addresses these challenges by first analyzing the block-based secret sharing in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11121v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11121v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11121v1-abstract-full" style="display: none;"> With the rapid advancements in information technology, reversible data hiding over encrypted images (RDH-EI) has become essential for secure image management in cloud services. However, existing RDH-EI schemes often suffer from high computational complexity, low embedding rates, and excessive data expansion. This paper addresses these challenges by first analyzing the block-based secret sharing in existing schemes, revealing significant data redundancy within image blocks. Based on this observation, we propose two space-preserving methods: the direct space-vacating method and the image-shrinking-based space-vacating method. Using these techniques, we design two novel RDH-EI schemes: a high-capacity RDH-EI scheme and a size-reduced RDH-EI scheme. The high-capacity RDH-EI scheme directly creates embedding space in encrypted images, eliminating the need for complex space-vacating operations and achieving higher and more stable embedding rates. In contrast, the size-reduced RDH-EI scheme minimizes data expansion by discarding unnecessary shares, resulting in smaller encrypted images. Experimental results show that the high-capacity RDH-EI scheme outperforms existing methods in terms of embedding capacity, while the size-reduced RDH-EI scheme excels in minimizing data expansion. Both schemes provide effective solutions to the challenges in RDH-EI, offering promising applications in fields such as medical imaging and cloud storage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11121v1-abstract-full').style.display = 'none'; document.getElementById('2502.11121v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07212">arXiv:2502.07212</a> <span> [<a href="https://arxiv.org/pdf/2502.07212">pdf</a>, <a href="https://arxiv.org/format/2502.07212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> A Hybrid-Domain Floating-Point Compute-in-Memory Architecture for Efficient Acceleration of High-Precision Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yi%2C+Z">Zhiqiang Yi</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yiwen Liang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weidong Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07212v1-abstract-short" style="display: inline;"> Compute-in-memory (CIM) has shown significant potential in efficiently accelerating deep neural networks (DNNs) at the edge, particularly in speeding up quantized models for inference applications. Recently, there has been growing interest in developing floating-point-based CIM macros to improve the accuracy of high-precision DNN models, including both inference and training tasks. Yet, current im… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07212v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07212v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07212v1-abstract-full" style="display: none;"> Compute-in-memory (CIM) has shown significant potential in efficiently accelerating deep neural networks (DNNs) at the edge, particularly in speeding up quantized models for inference applications. Recently, there has been growing interest in developing floating-point-based CIM macros to improve the accuracy of high-precision DNN models, including both inference and training tasks. Yet, current implementations rely primarily on digital methods, leading to substantial power consumption. This paper introduces a hybrid domain CIM architecture that integrates analog and digital CIM within the same memory cell to efficiently accelerate high-precision DNNs. Specifically, we develop area-efficient circuits and energy-efficient analog-to-digital conversion techniques to realize this architecture. Comprehensive circuit-level simulations reveal the notable energy efficiency and lossless accuracy of the proposed design on benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07212v1-abstract-full').style.display = 'none'; document.getElementById('2502.07212v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 5 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06781">arXiv:2502.06781</a> <span> [<a href="https://arxiv.org/pdf/2502.06781">pdf</a>, <a href="https://arxiv.org/format/2502.06781">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Limit of Outcome Reward for Learning Mathematical Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+C">Chengqi Lyu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Songyang Gao</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yuzhe Gu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jianfei Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kuikun Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziyi Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuaibin Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Q">Qian Zhao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haian Huang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weihan Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiangning Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongwei Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junnan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Songyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+D">Dahua Lin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06781v1-abstract-short" style="display: inline;"> Reasoning abilities, especially those for solving complex math problems, are crucial components of general intelligence. Recent advances by proprietary companies, such as o-series models of OpenAI, have made remarkable progress on reasoning tasks. However, the complete technical details remain unrevealed, and the techniques that are believed certainly to be adopted are only reinforcement learning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06781v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06781v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06781v1-abstract-full" style="display: none;"> Reasoning abilities, especially those for solving complex math problems, are crucial components of general intelligence. Recent advances by proprietary companies, such as o-series models of OpenAI, have made remarkable progress on reasoning tasks. However, the complete technical details remain unrevealed, and the techniques that are believed certainly to be adopted are only reinforcement learning (RL) and the long chain of thoughts. This paper proposes a new RL framework, termed OREAL, to pursue the performance limit that can be achieved through \textbf{O}utcome \textbf{RE}w\textbf{A}rd-based reinforcement \textbf{L}earning for mathematical reasoning tasks, where only binary outcome rewards are easily accessible. We theoretically prove that behavior cloning on positive trajectories from best-of-N (BoN) sampling is sufficient to learn the KL-regularized optimal policy in binary feedback environments. This formulation further implies that the rewards of negative samples should be reshaped to ensure the gradient consistency between positive and negative samples. To alleviate the long-existing difficulties brought by sparse rewards in RL, which are even exacerbated by the partial correctness of the long chain of thought for reasoning tasks, we further apply a token-level reward model to sample important tokens in reasoning trajectories for learning. With OREAL, for the first time, a 7B model can obtain 94.0 pass@1 accuracy on MATH-500 through RL, being on par with 32B models. OREAL-32B also surpasses previous 32B models trained by distillation with 95.0 pass@1 accuracy on MATH-500. Our investigation also indicates the importance of initial policy models and training queries for RL. Code, models, and data will be released to benefit future research\footnote{https://github.com/InternLM/OREAL}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06781v1-abstract-full').style.display = 'none'; document.getElementById('2502.06781v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">We released our code, data, and model on https://github.com/InternLM/OREAL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05783">arXiv:2502.05783</a> <span> [<a href="https://arxiv.org/pdf/2502.05783">pdf</a>, <a href="https://arxiv.org/format/2502.05783">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> WatchGuardian: Enabling User-Defined Personalized Just-in-Time Intervention on Smartwatch </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lei%2C+Y">Ying Lei</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yancheng Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Will Wang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yuanzhe Dong</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C">Changchang Yin</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weidan Cao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jingzhen Yang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+B">Bingsheng Yao</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Y">Yifan Peng</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+C">Chunhua Weng</a>, <a href="/search/cs?searchtype=author&query=Auerbach%2C+R">Randy Auerbach</a>, <a href="/search/cs?searchtype=author&query=Mamykina%2C+L">Lena Mamykina</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dakuo Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuntao Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xuhai Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05783v1-abstract-short" style="display: inline;"> While just-in-time interventions (JITIs) have effectively targeted common health behaviors, individuals often have unique needs to intervene in personal undesirable actions that can negatively affect physical, mental, and social well-being. We present WatchGuardian, a smartwatch-based JITI system that empowers users to define custom interventions for these personal actions with a small number of s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05783v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05783v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05783v1-abstract-full" style="display: none;"> While just-in-time interventions (JITIs) have effectively targeted common health behaviors, individuals often have unique needs to intervene in personal undesirable actions that can negatively affect physical, mental, and social well-being. We present WatchGuardian, a smartwatch-based JITI system that empowers users to define custom interventions for these personal actions with a small number of samples. For the model to detect new actions based on limited new data samples, we developed a few-shot learning pipeline that finetuned a pre-trained inertial measurement unit (IMU) model on public hand-gesture datasets. We then designed a data augmentation and synthesis process to train additional classification layers for customization. Our offline evaluation with 26 participants showed that with three, five, and ten examples, our approach achieved an average accuracy of 76.8%, 84.7%, and 87.7%, and an F1 score of 74.8%, 84.2%, and 87.2% We then conducted a four-hour intervention study to compare WatchGuardian against a rule-based intervention. Our results demonstrated that our system led to a significant reduction by 64.0 +- 22.6% in undesirable actions, substantially outperforming the baseline by 29.0%. Our findings underscore the effectiveness of a customizable, AI-driven JITI system for individuals in need of behavioral intervention in personal undesirable actions. We envision that our work can inspire broader applications of user-defined personalized intervention with advanced AI solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05783v1-abstract-full').style.display = 'none'; document.getElementById('2502.05783v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under submission</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68U35 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.5.2; I.2.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03732">arXiv:2502.03732</a> <span> [<a href="https://arxiv.org/pdf/2502.03732">pdf</a>, <a href="https://arxiv.org/format/2502.03732">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> More Modality, More AI: Exploring Design Opportunities of AI-Based Multi-modal Remote Monitoring Technologies for Early Detection of Mental Health Sequelae in Youth Concussion Patients </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+B">Bingsheng Yao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Menglin Zhao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yuling Sun</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weidan Cao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C">Changchang Yin</a>, <a href="/search/cs?searchtype=author&query=Intille%2C+S">Stephen Intille</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xuhai Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jingzhen Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dakuo Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03732v1-abstract-short" style="display: inline;"> Anxiety, depression, and suicidality are common mental health sequelae following concussion in youth patients, often exacerbating concussion symptoms and prolonging recovery. Despite the critical need for early detection of these mental health symptoms, clinicians often face challenges in accurately collecting patients' mental health data and making clinical decision-making in a timely manner. Tod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03732v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03732v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03732v1-abstract-full" style="display: none;"> Anxiety, depression, and suicidality are common mental health sequelae following concussion in youth patients, often exacerbating concussion symptoms and prolonging recovery. Despite the critical need for early detection of these mental health symptoms, clinicians often face challenges in accurately collecting patients' mental health data and making clinical decision-making in a timely manner. Today's remote patient monitoring (RPM) technologies offer opportunities to objectively monitor patients' activities, but they were not specifically designed for youth concussion patients; moreover, the large amount of data collected by RPM technologies may also impose significant workloads on clinicians to keep up with and use the data. To address these gaps, we employed a three-stage study consisting of a formative study, interface design, and design evaluation. We first conducted a formative study through semi-structured interviews with six highly professional concussion clinicians and identified clinicians' key challenges in remotely collecting patient information and accessing patient treatment compliance. Subsequently, we proposed preliminary clinician-facing interface designs with the integration of AI-based RPM technologies (AI-RPM), followed by design evaluation sessions with highly professional concussion clinicians. Clinicians underscored the value of integrating multi-modal AI-RPM technologies to support clinicians' decision-making while emphasizing the importance of customizable interfaces with explainability and multiple responsible design considerations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03732v1-abstract-full').style.display = 'none'; document.getElementById('2502.03732v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01986">arXiv:2502.01986</a> <span> [<a href="https://arxiv.org/pdf/2502.01986">pdf</a>, <a href="https://arxiv.org/format/2502.01986">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> DCT-Mamba3D: Spectral Decorrelation and Spatial-Spectral Feature Extraction for Hyperspectral Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weijia Cao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaofei Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yicong Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zheng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01986v1-abstract-short" style="display: inline;"> Hyperspectral image classification presents challenges due to spectral redundancy and complex spatial-spectral dependencies. This paper proposes a novel framework, DCT-Mamba3D, for hyperspectral image classification. DCT-Mamba3D incorporates: (1) a 3D spectral-spatial decorrelation module that applies 3D discrete cosine transform basis functions to reduce both spectral and spatial redundancy, enha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01986v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01986v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01986v1-abstract-full" style="display: none;"> Hyperspectral image classification presents challenges due to spectral redundancy and complex spatial-spectral dependencies. This paper proposes a novel framework, DCT-Mamba3D, for hyperspectral image classification. DCT-Mamba3D incorporates: (1) a 3D spectral-spatial decorrelation module that applies 3D discrete cosine transform basis functions to reduce both spectral and spatial redundancy, enhancing feature clarity across dimensions; (2) a 3D-Mamba module that leverages a bidirectional state-space model to capture intricate spatial-spectral dependencies; and (3) a global residual enhancement module that stabilizes feature representation, improving robustness and convergence. Extensive experiments on benchmark datasets show that our DCT-Mamba3D outperforms the state-of-the-art methods in challenging scenarios such as the same object in different spectra and different objects in the same spectra. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01986v1-abstract-full').style.display = 'none'; document.getElementById('2502.01986v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14548">arXiv:2501.14548</a> <span> [<a href="https://arxiv.org/pdf/2501.14548">pdf</a>, <a href="https://arxiv.org/format/2501.14548">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Large-scale and Fine-grained Vision-language Pre-training for Enhanced CT Image Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shui%2C+Z">Zhongyi Shui</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianpeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weiwei Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sinuo Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+R">Ruizhe Guo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Lin Yang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xianghua Ye</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+T">Tingbo Liang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Ling Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14548v1-abstract-short" style="display: inline;"> Artificial intelligence (AI) shows great potential in assisting radiologists to improve the efficiency and accuracy of medical image interpretation and diagnosis. However, a versatile AI model requires large-scale data and comprehensive annotations, which are often impractical in medical settings. Recent studies leverage radiology reports as a naturally high-quality supervision for medical images,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14548v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14548v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14548v1-abstract-full" style="display: none;"> Artificial intelligence (AI) shows great potential in assisting radiologists to improve the efficiency and accuracy of medical image interpretation and diagnosis. However, a versatile AI model requires large-scale data and comprehensive annotations, which are often impractical in medical settings. Recent studies leverage radiology reports as a naturally high-quality supervision for medical images, using contrastive language-image pre-training (CLIP) to develop language-informed models for radiological image interpretation. Nonetheless, these approaches typically contrast entire images with reports, neglecting the local associations between imaging regions and report sentences, which may undermine model performance and interoperability. In this paper, we propose a fine-grained vision-language model (fVLM) for anatomy-level CT image interpretation. Specifically, we explicitly match anatomical regions of CT images with corresponding descriptions in radiology reports and perform contrastive pre-training for each anatomy individually. Fine-grained alignment, however, faces considerable false-negative challenges, mainly from the abundance of anatomy-level healthy samples and similarly diseased abnormalities. To tackle this issue, we propose identifying false negatives of both normal and abnormal samples and calibrating contrastive learning from patient-level to disease-aware pairing. We curated the largest CT dataset to date, comprising imaging and report data from 69,086 patients, and conducted a comprehensive evaluation of 54 major and important disease diagnosis tasks across 15 main anatomies. Experimental results demonstrate the substantial potential of fVLM in versatile medical image interpretation. In the zero-shot classification task, we achieved an average AUC of 81.3% on 54 diagnosis tasks, surpassing CLIP and supervised methods by 12.9% and 8.0%, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14548v1-abstract-full').style.display = 'none'; document.getElementById('2501.14548v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09616">arXiv:2501.09616</a> <span> [<a href="https://arxiv.org/pdf/2501.09616">pdf</a>, <a href="https://arxiv.org/format/2501.09616">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ARMAX identification of low rank graphical models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenqi Cao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Aming Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09616v1-abstract-short" style="display: inline;"> In large-scale systems, complex internal relationships are often present. Such interconnected systems can be effectively described by low rank stochastic processes. When identifying a predictive model of low rank processes from sampling data, the rank-deficient property of spectral densities is often obscured by the inevitable measurement noise in practice. However, existing low rank identificatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09616v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09616v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09616v1-abstract-full" style="display: none;"> In large-scale systems, complex internal relationships are often present. Such interconnected systems can be effectively described by low rank stochastic processes. When identifying a predictive model of low rank processes from sampling data, the rank-deficient property of spectral densities is often obscured by the inevitable measurement noise in practice. However, existing low rank identification approaches often did not take noise into explicit consideration, leading to non-negligible inaccuracies even under weak noise. In this paper, we address the identification issue of low rank processes under measurement noise. We find that the noisy measurement model admits a sparse plus low rank structure in latent-variable graphical models. Specifically, we first decompose the problem into a maximum entropy covariance extension problem, and a low rank graphical estimation problem based on an autoregressive moving-average with exogenous input (ARMAX) model. To identify the ARMAX low rank graphical models, we propose an estimation approach based on maximum likelihood. The identifiability and consistency of this approach are proven under certain conditions. Simulation results confirm the reliable performance of the entire algorithm in both the parameter estimation and noisy data filtering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09616v1-abstract-full').style.display = 'none'; document.getElementById('2501.09616v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00190">arXiv:2501.00190</a> <span> [<a href="https://arxiv.org/pdf/2501.00190">pdf</a>, <a href="https://arxiv.org/format/2501.00190">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> SepsisCalc: Integrating Clinical Calculators into Early Sepsis Prediction via Dynamic Temporal Graph Construction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+C">Changchang Yin</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+S">Shihan Fu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+B">Bingsheng Yao</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+T">Thai-Hoang Pham</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weidan Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dakuo Wang</a>, <a href="/search/cs?searchtype=author&query=Caterino%2C+J">Jeffrey Caterino</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00190v2-abstract-short" style="display: inline;"> Sepsis is an organ dysfunction caused by a deregulated immune response to an infection. Early sepsis prediction and identification allow for timely intervention, leading to improved clinical outcomes. Clinical calculators (e.g., the six-organ dysfunction assessment of SOFA) play a vital role in sepsis identification within clinicians' workflow, providing evidence-based risk assessments essential f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00190v2-abstract-full').style.display = 'inline'; document.getElementById('2501.00190v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00190v2-abstract-full" style="display: none;"> Sepsis is an organ dysfunction caused by a deregulated immune response to an infection. Early sepsis prediction and identification allow for timely intervention, leading to improved clinical outcomes. Clinical calculators (e.g., the six-organ dysfunction assessment of SOFA) play a vital role in sepsis identification within clinicians' workflow, providing evidence-based risk assessments essential for sepsis diagnosis. However, artificial intelligence (AI) sepsis prediction models typically generate a single sepsis risk score without incorporating clinical calculators for assessing organ dysfunctions, making the models less convincing and transparent to clinicians. To bridge the gap, we propose to mimic clinicians' workflow with a novel framework SepsisCalc to integrate clinical calculators into the predictive model, yielding a clinically transparent and precise model for utilization in clinical settings. Practically, clinical calculators usually combine information from multiple component variables in Electronic Health Records (EHR), and might not be applicable when the variables are (partially) missing. We mitigate this issue by representing EHRs as temporal graphs and integrating a learning module to dynamically add the accurately estimated calculator to the graphs. Experimental results on real-world datasets show that the proposed model outperforms state-of-the-art methods on sepsis prediction tasks. Moreover, we developed a system to identify organ dysfunctions and potential sepsis risks, providing a human-AI interaction tool for deployment, which can help clinicians understand the prediction outputs and prepare timely interventions for the corresponding dysfunctions, paving the way for actionable clinical decision-making support for early intervention. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00190v2-abstract-full').style.display = 'none'; document.getElementById('2501.00190v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06353">arXiv:2412.06353</a> <span> [<a href="https://arxiv.org/pdf/2412.06353">pdf</a>, <a href="https://arxiv.org/format/2412.06353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> 3D Extended Target Sensing in ISAC: Cram茅r-Rao Bound Analysis and Beamforming Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yiqiu Wang</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+M">Meixia Tao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+S">Shu Sun</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wei Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06353v2-abstract-short" style="display: inline;"> This paper investigates an integrated sensing and communication (ISAC) system where the sensing target is a three-dimensional (3D) extended target, for which multiple scatterers from the target surface can be resolved. We first introduce a second-order truncated Fourier series surface model for an arbitrarily-shaped 3D ET. Utilizing this model, we derive tractable Cramer-Rao bounds (CRBs) for esti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06353v2-abstract-full').style.display = 'inline'; document.getElementById('2412.06353v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06353v2-abstract-full" style="display: none;"> This paper investigates an integrated sensing and communication (ISAC) system where the sensing target is a three-dimensional (3D) extended target, for which multiple scatterers from the target surface can be resolved. We first introduce a second-order truncated Fourier series surface model for an arbitrarily-shaped 3D ET. Utilizing this model, we derive tractable Cramer-Rao bounds (CRBs) for estimating the ET kinematic parameters, including the center range, azimuth, elevation, and orientation. These CRBs depend explicitly on the transmit covariance matrix and ET shape. Then we formulate two transmit beamforming optimization problems for the base station (BS) to simultaneously support communication with multiple users and sensing of the 3D ET. The first minimizes the sensing CRB while ensuring a minimum signal-to-interference-plus-noise ratio (SINR) for each user, and it is solved using semidefinite relaxation. The second balances minimizing the CRB and maximizing communication rates through a weight factor, and is solved via successive convex approximation. To reduce the computational complexity, we further propose ISACBeam-GNN, a novel graph neural network-based beamforming method that employs a separate-then-integrate structure, learning communication and sensing (C&S) objectives independently before integrating them to balance C&S trade-offs. Simulation results show that the proposed beamforming designs that account for ET shapes significantly outperform existing baselines, offering better communication-sensing performance trade-offs as well as an improved beampattern for sensing. Results also demonstrate that ISACBeam-GNN is an efficient alternative to the optimization-based methods, with remarkable adaptability and scalability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06353v2-abstract-full').style.display = 'none'; document.getElementById('2412.06353v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 9 figures, partially published in IEEE Global Communications Conference 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18831">arXiv:2411.18831</a> <span> [<a href="https://arxiv.org/pdf/2411.18831">pdf</a>, <a href="https://arxiv.org/format/2411.18831">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Measuring Risk of Bias in Biomedical Reports: The RoBBR Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianyou Wang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weili Cao</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+L">Longtian Bao</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Youze Zheng</a>, <a href="/search/cs?searchtype=author&query=Pasternak%2C+G">Gil Pasternak</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kaicheng Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoyue Wang</a>, <a href="/search/cs?searchtype=author&query=Paturi%2C+R">Ramamohan Paturi</a>, <a href="/search/cs?searchtype=author&query=Bergen%2C+L">Leon Bergen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18831v1-abstract-short" style="display: inline;"> Systems that answer questions by reviewing the scientific literature are becoming increasingly feasible. To draw reliable conclusions, these systems should take into account the quality of available evidence, placing more weight on studies that use a valid methodology. We present a benchmark for measuring the methodological strength of biomedical papers, drawing on the risk-of-bias framework used… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18831v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18831v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18831v1-abstract-full" style="display: none;"> Systems that answer questions by reviewing the scientific literature are becoming increasingly feasible. To draw reliable conclusions, these systems should take into account the quality of available evidence, placing more weight on studies that use a valid methodology. We present a benchmark for measuring the methodological strength of biomedical papers, drawing on the risk-of-bias framework used for systematic reviews. The four benchmark tasks, drawn from more than 500 papers, cover the analysis of research study methodology, followed by evaluation of risk of bias in these studies. The benchmark contains 2000 expert-generated bias annotations, and a human-validated pipeline for fine-grained alignment with research paper content. We evaluate a range of large language models on the benchmark, and find that these models fall significantly short of expert-level performance. By providing a standardized tool for measuring judgments of study quality, the benchmark can help to guide systems that perform large-scale aggregation of scientific data. The dataset is available at https://github.com/RoBBR-Benchmark/RoBBR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18831v1-abstract-full').style.display = 'none'; document.getElementById('2411.18831v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11483">arXiv:2411.11483</a> <span> [<a href="https://arxiv.org/pdf/2411.11483">pdf</a>, <a href="https://arxiv.org/format/2411.11483">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Robust State Estimation for Legged Robots with Dual Beta Kalman Filter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenhan Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chang Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tao Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiangtao Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S+E">Shengbo Eben Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11483v1-abstract-short" style="display: inline;"> Existing state estimation algorithms for legged robots that rely on proprioceptive sensors often overlook foot slippage and leg deformation in the physical world, leading to large estimation errors. To address this limitation, we propose a comprehensive measurement model that accounts for both foot slippage and variable leg length by analyzing the relative motion between foot contact points and th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11483v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11483v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11483v1-abstract-full" style="display: none;"> Existing state estimation algorithms for legged robots that rely on proprioceptive sensors often overlook foot slippage and leg deformation in the physical world, leading to large estimation errors. To address this limitation, we propose a comprehensive measurement model that accounts for both foot slippage and variable leg length by analyzing the relative motion between foot contact points and the robot's body center. We show that leg length is an observable quantity, meaning that its value can be explicitly inferred by designing an auxiliary filter. To this end, we introduce a dual estimation framework that iteratively employs a parameter filter to estimate the leg length parameters and a state filter to estimate the robot's state. To prevent error accumulation in this iterative framework, we construct a partial measurement model for the parameter filter using the leg static equation. This approach ensures that leg length estimation relies solely on joint torques and foot contact forces, avoiding the influence of state estimation errors on the parameter estimation. Unlike leg length which can be directly estimated, foot slippage cannot be measured directly with the current sensor configuration. However, since foot slippage occurs at a low frequency, it can be treated as outliers in the measurement data. To mitigate the impact of these outliers, we propose the beta Kalman filter (beta KF), which redefines the estimation loss in canonical Kalman filtering using beta divergence. This divergence can assign low weights to outliers in an adaptive manner, thereby enhancing the robustness of the estimation algorithm. These techniques together form the dual beta-Kalman filter (Dual beta KF), a novel algorithm for robust state estimation in legged robots. Experimental results on the Unitree GO2 robot demonstrate that the Dual beta KF significantly outperforms state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11483v1-abstract-full').style.display = 'none'; document.getElementById('2411.11483v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09844">arXiv:2410.09844</a> <span> [<a href="https://arxiv.org/pdf/2410.09844">pdf</a>, <a href="https://arxiv.org/format/2410.09844">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s00371-024-03610-0">10.1007/s00371-024-03610-0 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> HASN: Hybrid Attention Separable Network for Efficient Image Super-resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weifeng Cao</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+X">Xiaoyan Lei</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+J">Jun Shi</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+W">Wanyong Liang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jie Liu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Z">Zongfei Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09844v1-abstract-short" style="display: inline;"> Recently, lightweight methods for single image super-resolution (SISR) have gained significant popularity and achieved impressive performance due to limited hardware resources. These methods demonstrate that adopting residual feature distillation is an effective way to enhance performance. However, we find that using residual connections after each block increases the model's storage and computati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09844v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09844v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09844v1-abstract-full" style="display: none;"> Recently, lightweight methods for single image super-resolution (SISR) have gained significant popularity and achieved impressive performance due to limited hardware resources. These methods demonstrate that adopting residual feature distillation is an effective way to enhance performance. However, we find that using residual connections after each block increases the model's storage and computational cost. Therefore, to simplify the network structure and learn higher-level features and relationships between features, we use depthwise separable convolutions, fully connected layers, and activation functions as the basic feature extraction modules. This significantly reduces computational load and the number of parameters while maintaining strong feature extraction capabilities. To further enhance model performance, we propose the Hybrid Attention Separable Block (HASB), which combines channel attention and spatial attention, thus making use of their complementary advantages. Additionally, we use depthwise separable convolutions instead of standard convolutions, significantly reducing the computational load and the number of parameters while maintaining strong feature extraction capabilities. During the training phase, we also adopt a warm-start retraining strategy to exploit the potential of the model further. Extensive experiments demonstrate the effectiveness of our approach. Our method achieves a smaller model size and reduced computational complexity without compromising performance. Code can be available at https://github.com/nathan66666/HASN.git <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09844v1-abstract-full').style.display = 'none'; document.getElementById('2410.09844v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Visual Computer</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04592">arXiv:2410.04592</a> <span> [<a href="https://arxiv.org/pdf/2410.04592">pdf</a>, <a href="https://arxiv.org/format/2410.04592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> CardioAI: A Multimodal AI-based System to Support Symptom Monitoring and Risk Detection of Cancer Treatment-Induced Cardiotoxicity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+S">Siyi Wu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weidan Cao</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+S">Shihan Fu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+B">Bingsheng Yao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Ziqi Yang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C">Changchang Yin</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+V">Varun Mishra</a>, <a href="/search/cs?searchtype=author&query=Addison%2C+D">Daniel Addison</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dakuo Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04592v3-abstract-short" style="display: inline;"> Despite recent advances in cancer treatments that prolong patients' lives, treatment-induced cardiotoxicity remains one severe side effect. The clinical decision-making of cardiotoxicity is challenging, as non-clinical symptoms can be missed until life-threatening events occur at a later stage, and clinicians already have a high workload centered on the treatment, not the side effects. Our project… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04592v3-abstract-full').style.display = 'inline'; document.getElementById('2410.04592v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04592v3-abstract-full" style="display: none;"> Despite recent advances in cancer treatments that prolong patients' lives, treatment-induced cardiotoxicity remains one severe side effect. The clinical decision-making of cardiotoxicity is challenging, as non-clinical symptoms can be missed until life-threatening events occur at a later stage, and clinicians already have a high workload centered on the treatment, not the side effects. Our project starts with a participatory design study with 11 clinicians to understand their practices and needs; then we build a multimodal AI system, CardioAI, that integrates wearables and LLM-powered voice assistants to monitor multimodal non-clinical symptoms. Also, the system includes an explainable risk prediction module that can generate cardiotoxicity risk scores and summaries as explanations to support clinicians' decision-making. We conducted a heuristic evaluation with four clinical experts and found that they all believe CardioAI integrates well into their workflow, reduces their information overload, and enables them to make more informed decisions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04592v3-abstract-full').style.display = 'none'; document.getElementById('2410.04592v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02917">arXiv:2410.02917</a> <span> [<a href="https://arxiv.org/pdf/2410.02917">pdf</a>, <a href="https://arxiv.org/format/2410.02917">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5220/0013201000003912">10.5220/0013201000003912 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep image-based Adaptive BRDF Measure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wen Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02917v1-abstract-short" style="display: inline;"> Efficient and accurate measurement of the bi-directional reflectance distribution function (BRDF) plays a key role in high quality image rendering and physically accurate sensor simulation. However, obtaining the reflectance properties of a material is both time-consuming and challenging. This paper presents a novel method for minimizing the number of samples required for high quality BRDF capture… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02917v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02917v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02917v1-abstract-full" style="display: none;"> Efficient and accurate measurement of the bi-directional reflectance distribution function (BRDF) plays a key role in high quality image rendering and physically accurate sensor simulation. However, obtaining the reflectance properties of a material is both time-consuming and challenging. This paper presents a novel method for minimizing the number of samples required for high quality BRDF capture using a gonio-reflectometer setup. Taking an image of the physical material sample as input a lightweight neural network first estimates the parameters of an analytic BRDF model, and the distribution of the sample locations. In a second step we use an image based loss to find the number of samples required to meet the accuracy required. This approach significantly accelerates the measurement process while maintaining a high level of accuracy and fidelity in the BRDF representation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02917v1-abstract-full').style.display = 'none'; document.getElementById('2410.02917v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2025,In Proceedings of the 20th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications - Volume 1: GRAPP, HUCAPP and IVAPP </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17951">arXiv:2409.17951</a> <span> [<a href="https://arxiv.org/pdf/2409.17951">pdf</a>, <a href="https://arxiv.org/format/2409.17951">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Spatial Hierarchy and Temporal Attention Guided Cross Masking for Self-supervised Skeleton-based Action Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+X">Xinpeng Yin</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenming Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17951v1-abstract-short" style="display: inline;"> In self-supervised skeleton-based action recognition, the mask reconstruction paradigm is gaining interest in enhancing model refinement and robustness through effective masking. However, previous works primarily relied on a single masking criterion, resulting in the model overfitting specific features and overlooking other effective information. In this paper, we introduce a hierarchy and attenti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17951v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17951v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17951v1-abstract-full" style="display: none;"> In self-supervised skeleton-based action recognition, the mask reconstruction paradigm is gaining interest in enhancing model refinement and robustness through effective masking. However, previous works primarily relied on a single masking criterion, resulting in the model overfitting specific features and overlooking other effective information. In this paper, we introduce a hierarchy and attention guided cross-masking framework (HA-CM) that applies masking to skeleton sequences from both spatial and temporal perspectives. Specifically, in spatial graphs, we utilize hyperbolic space to maintain joint distinctions and effectively preserve the hierarchical structure of high-dimensional skeletons, employing joint hierarchy as the masking criterion. In temporal flows, we substitute traditional distance metrics with the global attention of joints for masking, addressing the convergence of distances in high-dimensional space and the lack of a global perspective. Additionally, we incorporate cross-contrast loss based on the cross-masking framework into the loss function to enhance the model's learning of instance-level features. HA-CM shows efficiency and universality on three public large-scale datasets, NTU-60, NTU-120, and PKU-MMD. The source code of our HA-CM is available at https://github.com/YinxPeng/HA-CM-main. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17951v1-abstract-full').style.display = 'none'; document.getElementById('2409.17951v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages,6 figures,IEEE Trans</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11024">arXiv:2409.11024</a> <span> [<a href="https://arxiv.org/pdf/2409.11024">pdf</a>, <a href="https://arxiv.org/format/2409.11024">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> D2Vformer: A Flexible Time Series Prediction Model Based on Time Position Embedding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+X">Xiaobao Song</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+L">Liwei Deng</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yuxin He</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenming Cao</a>, <a href="/search/cs?searchtype=author&query=Leungc%2C+C">Chi-Sing Leungc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11024v1-abstract-short" style="display: inline;"> Time position embeddings capture the positional information of time steps, often serving as auxiliary inputs to enhance the predictive capabilities of time series models. However, existing models exhibit limitations in capturing intricate time positional information and effectively utilizing these embeddings. To address these limitations, this paper proposes a novel model called D2Vformer. Unlike… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11024v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11024v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11024v1-abstract-full" style="display: none;"> Time position embeddings capture the positional information of time steps, often serving as auxiliary inputs to enhance the predictive capabilities of time series models. However, existing models exhibit limitations in capturing intricate time positional information and effectively utilizing these embeddings. To address these limitations, this paper proposes a novel model called D2Vformer. Unlike typical prediction methods that rely on RNNs or Transformers, this approach can directly handle scenarios where the predicted sequence is not adjacent to the input sequence or where its length dynamically changes. In comparison to conventional methods, D2Vformer undoubtedly saves a significant amount of training resources. In D2Vformer, the Date2Vec module uses the timestamp information and feature sequences to generate time position embeddings. Afterward, D2Vformer introduces a new fusion block that utilizes an attention mechanism to explore the similarity in time positions between the embeddings of the input sequence and the predicted sequence, thereby generating predictions based on this similarity. Through extensive experiments on six datasets, we demonstrate that Date2Vec outperforms other time position embedding methods, and D2Vformer surpasses state-of-the-art methods in both fixed-length and variable-length prediction tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11024v1-abstract-full').style.display = 'none'; document.getElementById('2409.11024v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06334">arXiv:2409.06334</a> <span> [<a href="https://arxiv.org/pdf/2409.06334">pdf</a>, <a href="https://arxiv.org/format/2409.06334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-Weather Image Restoration via Histogram-Based Transformer Feature Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yang Wen</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+A">Anyu Lai</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+B">Bo Qian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wuzhen Shi</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenming Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06334v1-abstract-short" style="display: inline;"> Currently, the mainstream restoration tasks under adverse weather conditions have predominantly focused on single-weather scenarios. However, in reality, multiple weather conditions always coexist and their degree of mixing is usually unknown. Under such complex and diverse weather conditions, single-weather restoration models struggle to meet practical demands. This is particularly critical in fi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06334v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06334v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06334v1-abstract-full" style="display: none;"> Currently, the mainstream restoration tasks under adverse weather conditions have predominantly focused on single-weather scenarios. However, in reality, multiple weather conditions always coexist and their degree of mixing is usually unknown. Under such complex and diverse weather conditions, single-weather restoration models struggle to meet practical demands. This is particularly critical in fields such as autonomous driving, where there is an urgent need for a model capable of effectively handling mixed weather conditions and enhancing image quality in an automated manner. In this paper, we propose a Task Sequence Generator module that, in conjunction with the Task Intra-patch Block, effectively extracts task-specific features embedded in degraded images. The Task Intra-patch Block introduces an external learnable sequence that aids the network in capturing task-specific information. Additionally, we employ a histogram-based transformer module as the backbone of our network, enabling the capture of both global and local dynamic range features. Our proposed model achieves state-of-the-art performance on public datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06334v1-abstract-full').style.display = 'none'; document.getElementById('2409.06334v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2409.03249</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03249">arXiv:2409.03249</a> <span> [<a href="https://arxiv.org/pdf/2409.03249">pdf</a>, <a href="https://arxiv.org/format/2409.03249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multiple weather images restoration using the task transformer and adaptive mixup strategy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yang Wen</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+A">Anyu Lai</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+B">Bo Qian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wuzhen Shi</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenming Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03249v1-abstract-short" style="display: inline;"> The current state-of-the-art in severe weather removal predominantly focuses on single-task applications, such as rain removal, haze removal, and snow removal. However, real-world weather conditions often consist of a mixture of several weather types, and the degree of weather mixing in autonomous driving scenarios remains unknown. In the presence of complex and diverse weather conditions, a singl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03249v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03249v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03249v1-abstract-full" style="display: none;"> The current state-of-the-art in severe weather removal predominantly focuses on single-task applications, such as rain removal, haze removal, and snow removal. However, real-world weather conditions often consist of a mixture of several weather types, and the degree of weather mixing in autonomous driving scenarios remains unknown. In the presence of complex and diverse weather conditions, a single weather removal model often encounters challenges in producing clear images from severe weather images. Therefore, there is a need for the development of multi-task severe weather removal models that can effectively handle mixed weather conditions and improve image quality in autonomous driving scenarios. In this paper, we introduce a novel multi-task severe weather removal model that can effectively handle complex weather conditions in an adaptive manner. Our model incorporates a weather task sequence generator, enabling the self-attention mechanism to selectively focus on features specific to different weather types. To tackle the challenge of repairing large areas of weather degradation, we introduce Fast Fourier Convolution (FFC) to increase the receptive field. Additionally, we propose an adaptive upsampling technique that effectively processes both the weather task information and underlying image features by selectively retaining relevant information. Our proposed model has achieved state-of-the-art performance on the publicly available dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03249v1-abstract-full').style.display = 'none'; document.getElementById('2409.03249v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 5 figures and 2 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16719">arXiv:2408.16719</a> <span> [<a href="https://arxiv.org/pdf/2408.16719">pdf</a>, <a href="https://arxiv.org/format/2408.16719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> H-SGANet: Hybrid Sparse Graph Attention Network for Deformable Medical Image Registration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yufeng Zhou</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenming Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16719v1-abstract-short" style="display: inline;"> The integration of Convolutional Neural Network (ConvNet) and Transformer has emerged as a strong candidate for image registration, leveraging the strengths of both models and a large parameter space. However, this hybrid model, treating brain MRI volumes as grid or sequence structures, faces challenges in accurately representing anatomical connectivity, diverse brain regions, and vital connection… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16719v1-abstract-full').style.display = 'inline'; document.getElementById('2408.16719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16719v1-abstract-full" style="display: none;"> The integration of Convolutional Neural Network (ConvNet) and Transformer has emerged as a strong candidate for image registration, leveraging the strengths of both models and a large parameter space. However, this hybrid model, treating brain MRI volumes as grid or sequence structures, faces challenges in accurately representing anatomical connectivity, diverse brain regions, and vital connections contributing to the brain's internal architecture. Concerns also arise regarding the computational expense and GPU memory usage associated with this model. To tackle these issues, a lightweight hybrid sparse graph attention network (H-SGANet) has been developed. This network incorporates a central mechanism, Sparse Graph Attention (SGA), based on a Vision Graph Neural Network (ViG) with predetermined anatomical connections. The SGA module expands the model's receptive field and seamlessly integrates into the network. To further amplify the advantages of the hybrid network, the Separable Self-Attention (SSA) is employed as an enhanced token mixer, integrated with depth-wise convolution to constitute SSAFormer. This strategic integration is designed to more effectively extract long-range dependencies. As a hybrid ConvNet-ViG-Transformer model, H-SGANet offers threefold benefits for volumetric medical image registration. It optimizes fixed and moving images concurrently through a hybrid feature fusion layer and an end-to-end learning framework. Compared to VoxelMorph, a model with a similar parameter count, H-SGANet demonstrates significant performance enhancements of 3.5% and 1.5% in Dice score on the OASIS dataset and LPBA40 dataset, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16719v1-abstract-full').style.display = 'none'; document.getElementById('2408.16719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14072">arXiv:2408.14072</a> <span> [<a href="https://arxiv.org/pdf/2408.14072">pdf</a>, <a href="https://arxiv.org/ps/2408.14072">ps</a>, <a href="https://arxiv.org/format/2408.14072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Hybrid SIC Aided Hybrid NOMA: A New Approach For Improving Energy Efficiency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yanshi Sun</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wei Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+N">Ning Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Momiao Zhou</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Z">Zhiguo Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14072v2-abstract-short" style="display: inline;"> Hybrid non-orthogonal multiple access (NOMA), which organically combines pure NOMA and conventional OMA, has recently received significant attention to be a promising multiple access framework for future wireless communication networks. However, most of the literatures on hybrid NOMA only consider fixed order of successive interference cancellation (SIC), namely FSIC, for the NOMA transmission pha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14072v2-abstract-full').style.display = 'inline'; document.getElementById('2408.14072v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14072v2-abstract-full" style="display: none;"> Hybrid non-orthogonal multiple access (NOMA), which organically combines pure NOMA and conventional OMA, has recently received significant attention to be a promising multiple access framework for future wireless communication networks. However, most of the literatures on hybrid NOMA only consider fixed order of successive interference cancellation (SIC), namely FSIC, for the NOMA transmission phase of hybrid NOMA, resulting in limited performance. Differently, this paper aims to reveal the potential of applying hybrid SIC (HSIC) to improve the energy efficiency of hybrid NOMA. Specifically, a HSIC aided hybrid NOMA scheme is proposed, which can be treated as a simple add-on to the legacy orthogonal multiple access (OMA) based network. The proposed scheme offers some users (termed ``opportunistic users'') to have more chances to transmit by transparently sharing legacy users' time slots. For a fair comparison, a power reducing coefficient $尾$ is introduced to ensure that the energy consumption of the proposed scheme is less than conventional OMA. Given $尾$, the probability for the event that the achievable rate of the proposed HSIC aided hybrid NOMA scheme cannot outperform its OMA counterpart is obtained in closed-form, by considering impact of user pairing. Furthermore, asymptotic analysis shows that the aforementioned probability can approach zero under some given conditions in the SNR regime, indicating that the energy efficiency of the proposed scheme is almost surely higher than that of OMA for these given conditions. Numerical results are presented to verify the analysis and also demonstrate the benefit of applying HSIC compared to FSIC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14072v2-abstract-full').style.display = 'none'; document.getElementById('2408.14072v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04229">arXiv:2408.04229</a> <span> [<a href="https://arxiv.org/pdf/2408.04229">pdf</a>, <a href="https://arxiv.org/ps/2408.04229">ps</a>, <a href="https://arxiv.org/format/2408.04229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Probabilistic Circuits for Cumulative Distribution Functions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Broadrick%2C+O">Oliver Broadrick</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">William Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Benjie Wang</a>, <a href="/search/cs?searchtype=author&query=Trapp%2C+M">Martin Trapp</a>, <a href="/search/cs?searchtype=author&query=Broeck%2C+G+V+d">Guy Van den Broeck</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04229v1-abstract-short" style="display: inline;"> A probabilistic circuit (PC) succinctly expresses a function that represents a multivariate probability distribution and, given sufficient structural properties of the circuit, supports efficient probabilistic inference. Typically a PC computes the probability mass (or density) function (PMF or PDF) of the distribution. We consider PCs instead computing the cumulative distribution function (CDF).… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04229v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04229v1-abstract-full" style="display: none;"> A probabilistic circuit (PC) succinctly expresses a function that represents a multivariate probability distribution and, given sufficient structural properties of the circuit, supports efficient probabilistic inference. Typically a PC computes the probability mass (or density) function (PMF or PDF) of the distribution. We consider PCs instead computing the cumulative distribution function (CDF). We show that for distributions over binary random variables these representations (PMF and CDF) are essentially equivalent, in the sense that one can be transformed to the other in polynomial time. We then show how a similar equivalence holds for distributions over finite discrete variables using a modification of the standard encoding with binary variables that aligns with the CDF semantics. Finally we show that for continuous variables, smooth, decomposable PCs computing PDFs and CDFs can be efficiently transformed to each other by modifying only the leaves of the circuit. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04229v1-abstract-full').style.display = 'none'; document.getElementById('2408.04229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In Proceedings of the UAI Workshop on Tractable Probabilistic Modeling (TPM), 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03586">arXiv:2408.03586</a> <span> [<a href="https://arxiv.org/pdf/2408.03586">pdf</a>, <a href="https://arxiv.org/format/2408.03586">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Clinical Challenges and AI Opportunities in Decision-Making for Cancer Treatment-Induced Cardiotoxicity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+S">Siyi Wu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weidan Cao</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+S">Shihan Fu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+B">Bingsheng Yao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Ziqi Yang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C">Changchang Yin</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+V">Varun Mishra</a>, <a href="/search/cs?searchtype=author&query=Addison%2C+D">Daniel Addison</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dakuo Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03586v1-abstract-short" style="display: inline;"> Cardiotoxicity induced by cancer treatment has become a major clinical concern, affecting the long-term survival and quality of life of cancer patients. Effective clinical decision-making, including the detection of cancer treatment-induced cardiotoxicity and the monitoring of associated symptoms, remains a challenging task for clinicians. This study investigates the current practices and needs of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03586v1-abstract-full').style.display = 'inline'; document.getElementById('2408.03586v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03586v1-abstract-full" style="display: none;"> Cardiotoxicity induced by cancer treatment has become a major clinical concern, affecting the long-term survival and quality of life of cancer patients. Effective clinical decision-making, including the detection of cancer treatment-induced cardiotoxicity and the monitoring of associated symptoms, remains a challenging task for clinicians. This study investigates the current practices and needs of clinicians in the clinical decision making of cancer treatment-induced cardiotoxicity and explores the potential of digital health technologies to support this process. Through semi-structured interviews with seven clinical experts, we identify a three-step decision-making paradigm: 1) symptom identification, 2) diagnostic testing and specialist collaboration, and 3) clinical decision-making and intervention. Our findings highlight the difficulties of diagnosing cardiotoxicity (absence of unified protocols and high variability in symptoms) and monitoring patient symptoms (lacking accurate and timely patient self-reported symptoms). The clinicians also expressed their need for effective early detection tools that can integrate remote patient monitoring capabilities. Based on these insights, we discuss the importance of understanding the dynamic nature of clinical workflows, and the design considerations for future digital tools to support cancer-treatment-induced cardiotoxicity decision-making. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03586v1-abstract-full').style.display = 'none'; document.getElementById('2408.03586v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Submission</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00545">arXiv:2408.00545</a> <span> [<a href="https://arxiv.org/pdf/2408.00545">pdf</a>, <a href="https://arxiv.org/format/2408.00545">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Collecting Larg-Scale Robotic Datasets on a High-Speed Mobile Platform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yuxin Lin</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiaxuan Ma</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Sizhe Gu</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+J">Jipeng Kong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+B">Bowen Xu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiting Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Dengji Zhao</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenhan Cao</a>, <a href="/search/cs?searchtype=author&query=Schwertfeger%2C+S">S枚ren Schwertfeger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00545v1-abstract-short" style="display: inline;"> Mobile robotics datasets are essential for research on robotics, for example for research on Simultaneous Localization and Mapping (SLAM). Therefore the ShanghaiTech Mapping Robot was constructed, that features a multitude high-performance sensors and a 16-node cluster to collect all this data. That robot is based on a Clearpath Husky mobile base with a maximum speed of 1 meter per second. This is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00545v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00545v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00545v1-abstract-full" style="display: none;"> Mobile robotics datasets are essential for research on robotics, for example for research on Simultaneous Localization and Mapping (SLAM). Therefore the ShanghaiTech Mapping Robot was constructed, that features a multitude high-performance sensors and a 16-node cluster to collect all this data. That robot is based on a Clearpath Husky mobile base with a maximum speed of 1 meter per second. This is fine for indoor datasets, but to collect large-scale outdoor datasets a faster platform is needed. This system paper introduces our high-speed mobile platform for data collection. The mapping robot is secured on the rear-steered flatbed car with maximum field of view. Additionally two encoders collect odometry data from two of the car wheels and an external sensor plate houses a downlooking RGB and event camera. With this setup a dataset of more than 10km in the underground parking garage and the outside of our campus was collected and is published with this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00545v1-abstract-full').style.display = 'none'; document.getElementById('2408.00545v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20770">arXiv:2407.20770</a> <span> [<a href="https://arxiv.org/pdf/2407.20770">pdf</a>, <a href="https://arxiv.org/format/2407.20770">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Non-Bayesian Social Learning with Multiview Observations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sui%2C+D">Dongyan Sui</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weichen Cao</a>, <a href="/search/cs?searchtype=author&query=Vlaski%2C+S">Stefan Vlaski</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+C">Chun Guan</a>, <a href="/search/cs?searchtype=author&query=Leng%2C+S">Siyang Leng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20770v1-abstract-short" style="display: inline;"> Non-Bayesian social learning enables multiple agents to conduct networked signal and information processing through observing environmental signals and information aggregating. Traditional non-Bayesian social learning models only consider single signals, limiting their applications in scenarios where multiple viewpoints of information are available. In this work, we exploit, in the information agg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20770v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20770v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20770v1-abstract-full" style="display: none;"> Non-Bayesian social learning enables multiple agents to conduct networked signal and information processing through observing environmental signals and information aggregating. Traditional non-Bayesian social learning models only consider single signals, limiting their applications in scenarios where multiple viewpoints of information are available. In this work, we exploit, in the information aggregation step, the independently learned results from observations taken from multiple viewpoints and propose a novel non-Bayesian social learning model for scenarios with multiview observations. We prove the convergence of the model under traditional assumptions and provide convergence conditions for the algorithm in the presence of misleading signals. Through theoretical analyses and numerical experiments, we validate the strong reliability and robustness of the proposed algorithm, showcasing its potential for real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20770v1-abstract-full').style.display = 'none'; document.getElementById('2407.20770v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19976">arXiv:2407.19976</a> <span> [<a href="https://arxiv.org/pdf/2407.19976">pdf</a>, <a href="https://arxiv.org/format/2407.19976">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> MambaGesture: Enhancing Co-Speech Gesture Generation with Mamba and Disentangled Multi-Modality Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fu%2C+C">Chencan Fu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yabiao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiangning Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhengkai Jiang</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+X">Xiaofeng Mao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiafu Wu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weijian Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengjie Wang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yanhao Ge</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19976v2-abstract-short" style="display: inline;"> Co-speech gesture generation is crucial for producing synchronized and realistic human gestures that accompany speech, enhancing the animation of lifelike avatars in virtual environments. While diffusion models have shown impressive capabilities, current approaches often overlook a wide range of modalities and their interactions, resulting in less dynamic and contextually varied gestures. To addre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19976v2-abstract-full').style.display = 'inline'; document.getElementById('2407.19976v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19976v2-abstract-full" style="display: none;"> Co-speech gesture generation is crucial for producing synchronized and realistic human gestures that accompany speech, enhancing the animation of lifelike avatars in virtual environments. While diffusion models have shown impressive capabilities, current approaches often overlook a wide range of modalities and their interactions, resulting in less dynamic and contextually varied gestures. To address these challenges, we present MambaGesture, a novel framework integrating a Mamba-based attention block, MambaAttn, with a multi-modality feature fusion module, SEAD. The MambaAttn block combines the sequential data processing strengths of the Mamba model with the contextual richness of attention mechanisms, enhancing the temporal coherence of generated gestures. SEAD adeptly fuses audio, text, style, and emotion modalities, employing disentanglement to deepen the fusion process and yield gestures with greater realism and diversity. Our approach, rigorously evaluated on the multi-modal BEAT dataset, demonstrates significant improvements in Fr茅chet Gesture Distance (FGD), diversity scores, and beat alignment, achieving state-of-the-art performance in co-speech gesture generation. Project website: $\href{https://fcchit.github.io/mambagesture/}{\textit{https://fcchit.github.io/mambagesture/}}$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19976v2-abstract-full').style.display = 'none'; document.getElementById('2407.19976v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACM MM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19150">arXiv:2407.19150</a> <span> [<a href="https://arxiv.org/pdf/2407.19150">pdf</a>, <a href="https://arxiv.org/format/2407.19150">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> RoSE-Opt: Robust and Efficient Analog Circuit Parameter Optimization with Knowledge-infused Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weidong Cao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jian Gao</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+T">Tianrui Ma</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+R">Rui Ma</a>, <a href="/search/cs?searchtype=author&query=Benosman%2C+M">Mouhacine Benosman</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19150v1-abstract-short" style="display: inline;"> This paper proposes a learning framework, RoSE-Opt, to achieve robust and efficient analog circuit parameter optimization. RoSE-Opt has two important features. First, it incorporates key domain knowledge of analog circuit design, such as circuit topology, couplings between circuit specifications, and variations of process, supply voltage, and temperature, into the learning loop. This strategy faci… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19150v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19150v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19150v1-abstract-full" style="display: none;"> This paper proposes a learning framework, RoSE-Opt, to achieve robust and efficient analog circuit parameter optimization. RoSE-Opt has two important features. First, it incorporates key domain knowledge of analog circuit design, such as circuit topology, couplings between circuit specifications, and variations of process, supply voltage, and temperature, into the learning loop. This strategy facilitates the training of an artificial agent capable of achieving design goals by identifying device parameters that are optimal and robust. Second, it exploits a two-level optimization method, that is, integrating Bayesian optimization (BO) with reinforcement learning (RL) to improve sample efficiency. In particular, BO is used for a coarse yet quick search of an initial starting point for optimization. This sets a solid foundation to efficiently train the RL agent with fewer samples. Experimental evaluations on benchmarking circuits show promising sample efficiency, extraordinary figure-of-merit in terms of design efficiency and design success rate, and Pareto optimality in circuit performance of our framework, compared to previous methods. Furthermore, this work thoroughly studies the performance of different RL optimization algorithms, such as Deep Deterministic Policy Gradients (DDPG) with an off-policy learning mechanism and Proximal Policy Optimization (PPO) with an on-policy learning mechanism. This investigation provides users with guidance on choosing the appropriate RL algorithms to optimize the device parameters of analog circuits. Finally, our study also demonstrates RoSE-Opt's promise in parasitic-aware device optimization for analog circuits. In summary, our work reports a knowledge-infused BO-RL design automation framework for reliable and efficient optimization of analog circuits' device parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19150v1-abstract-full').style.display = 'none'; document.getElementById('2407.19150v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 12 Figures. Accepted by IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14100">arXiv:2407.14100</a> <span> [<a href="https://arxiv.org/pdf/2407.14100">pdf</a>, <a href="https://arxiv.org/format/2407.14100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ParamsDrag: Interactive Parameter Space Exploration via Image-Space Dragging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+G">Guan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+G">Guihua Shan</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">Shiyu Cheng</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weiqun Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junpeng Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Ko-Chih Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14100v1-abstract-short" style="display: inline;"> Numerical simulation serves as a cornerstone in scientific modeling, yet the process of fine-tuning simulation parameters poses significant challenges. Conventionally, parameter adjustment relies on extensive numerical simulations, data analysis, and expert insights, resulting in substantial computational costs and low efficiency. The emergence of deep learning in recent years has provided promisi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14100v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14100v1-abstract-full" style="display: none;"> Numerical simulation serves as a cornerstone in scientific modeling, yet the process of fine-tuning simulation parameters poses significant challenges. Conventionally, parameter adjustment relies on extensive numerical simulations, data analysis, and expert insights, resulting in substantial computational costs and low efficiency. The emergence of deep learning in recent years has provided promising avenues for more efficient exploration of parameter spaces. However, existing approaches often lack intuitive methods for precise parameter adjustment and optimization. To tackle these challenges, we introduce ParamsDrag, a model that facilitates parameter space exploration through direct interaction with visualizations. Inspired by DragGAN, our ParamsDrag model operates in three steps. First, the generative component of ParamsDrag generates visualizations based on the input simulation parameters. Second, by directly dragging structure-related features in the visualizations, users can intuitively understand the controlling effect of different parameters. Third, with the understanding from the earlier step, users can steer ParamsDrag to produce dynamic visual outcomes. Through experiments conducted on real-world simulations and comparisons with state-of-the-art deep learning-based approaches, we demonstrate the efficacy of our solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14100v1-abstract-full').style.display = 'none'; document.getElementById('2407.14100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published in Proc. IEEE VIS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03374">arXiv:2407.03374</a> <span> [<a href="https://arxiv.org/pdf/2407.03374">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> An Outline of Prognostics and Health Management Large Model: Concepts, Paradigms, and Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tao%2C+L">Laifa Tao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shangyu Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haifei Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Q">Qixuan Huang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Liang Ma</a>, <a href="/search/cs?searchtype=author&query=Ning%2C+G">Guoao Ning</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yiling Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yunlong Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bin Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weiwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhengduo Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+W">Wenchao Zhan</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenyan Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chao Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongmei Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jian Ma</a>, <a href="/search/cs?searchtype=author&query=Suo%2C+M">Mingliang Suo</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yujie Cheng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yu Ding</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dengwei Song</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chen Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03374v1-abstract-short" style="display: inline;"> Prognosis and Health Management (PHM), critical for ensuring task completion by complex systems and preventing unexpected failures, is widely adopted in aerospace, manufacturing, maritime, rail, energy, etc. However, PHM's development is constrained by bottlenecks like generalization, interpretation and verification abilities. Presently, generative artificial intelligence (AI), represented by Larg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03374v1-abstract-full').style.display = 'inline'; document.getElementById('2407.03374v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03374v1-abstract-full" style="display: none;"> Prognosis and Health Management (PHM), critical for ensuring task completion by complex systems and preventing unexpected failures, is widely adopted in aerospace, manufacturing, maritime, rail, energy, etc. However, PHM's development is constrained by bottlenecks like generalization, interpretation and verification abilities. Presently, generative artificial intelligence (AI), represented by Large Model, heralds a technological revolution with the potential to fundamentally reshape traditional technological fields and human production methods. Its capabilities, including strong generalization, reasoning, and generative attributes, present opportunities to address PHM's bottlenecks. To this end, based on a systematic analysis of the current challenges and bottlenecks in PHM, as well as the research status and advantages of Large Model, we propose a novel concept and three progressive paradigms of Prognosis and Health Management Large Model (PHM-LM) through the integration of the Large Model with PHM. Subsequently, we provide feasible technical approaches for PHM-LM to bolster PHM's core capabilities within the framework of the three paradigms. Moreover, to address core issues confronting PHM, we discuss a series of technical challenges of PHM-LM throughout the entire process of construction and application. This comprehensive effort offers a holistic PHM-LM technical framework, and provides avenues for new PHM technologies, methodologies, tools, platforms and applications, which also potentially innovates design, research & development, verification and application mode of PHM. And furthermore, a new generation of PHM with AI will also capably be realized, i.e., from custom to generalized, from discriminative to generative, and from theoretical conditions to practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03374v1-abstract-full').style.display = 'none'; document.getElementById('2407.03374v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18204">arXiv:2406.18204</a> <span> [<a href="https://arxiv.org/pdf/2406.18204">pdf</a>, <a href="https://arxiv.org/format/2406.18204">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Analysis of Channel Uncertainty in Trusted Wireless Services via Repeated Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+B">Bingwen Chen</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+X">Xintong Ling</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weihang Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiaheng Wang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Z">Zhi Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18204v2-abstract-short" style="display: inline;"> The coexistence of heterogeneous sub-networks in 6G poses new security and trust concerns and thus calls for a perimeterless-security model. Blockchain radio access network (B-RAN) provides a trust-building approach via repeated interactions rather than relying on pre-established trust or central authentication. Such a trust-building process naturally supports dynamic trusted services across vario… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18204v2-abstract-full').style.display = 'inline'; document.getElementById('2406.18204v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18204v2-abstract-full" style="display: none;"> The coexistence of heterogeneous sub-networks in 6G poses new security and trust concerns and thus calls for a perimeterless-security model. Blockchain radio access network (B-RAN) provides a trust-building approach via repeated interactions rather than relying on pre-established trust or central authentication. Such a trust-building process naturally supports dynamic trusted services across various service providers (SP) without the need for perimeter-based authentications; however, it remains vulnerable to environmental and system unreliability such as wireless channel uncertainty. In this study, we investigate channel unreliability in the trust-building framework based on repeated interactions for secure wireless services. We derive specific requirements for achieving cooperation between SP and client via a repeated game model and illustrate the implications of channel unreliability on sustaining trusted access services. We consider the framework optimization to guarantee SP-client cooperation, given a worst-case channel condition. Furthermore, we introduce the concept of cooperation region to represent the robustness of the trust-building process and explore the maximum cooperation area to enhance service resilience. Finally, we present simulations to demonstrate the system performance over fading channels and verify our results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18204v2-abstract-full').style.display = 'none'; document.getElementById('2406.18204v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16710">arXiv:2406.16710</a> <span> [<a href="https://arxiv.org/pdf/2406.16710">pdf</a>, <a href="https://arxiv.org/format/2406.16710">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ID-Sculpt: ID-aware 3D Head Generation from Single In-the-wild Portrait Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jinkun Hao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Junshu Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiangning Zhang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ran Yi</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yijia Hong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Moran Li</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weijian Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yating Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengjie Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lizhuang Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16710v3-abstract-short" style="display: inline;"> While recent works have achieved great success on image-to-3D object generation, high quality and fidelity 3D head generation from a single image remains a great challenge. Previous text-based methods for generating 3D heads were limited by text descriptions and image-based methods struggled to produce high-quality head geometry. To handle this challenging problem, we propose a novel framework, ID… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16710v3-abstract-full').style.display = 'inline'; document.getElementById('2406.16710v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16710v3-abstract-full" style="display: none;"> While recent works have achieved great success on image-to-3D object generation, high quality and fidelity 3D head generation from a single image remains a great challenge. Previous text-based methods for generating 3D heads were limited by text descriptions and image-based methods struggled to produce high-quality head geometry. To handle this challenging problem, we propose a novel framework, ID-Sculpt, to generate high-quality 3D heads while preserving their identities. Our work incorporates the identity information of the portrait image into three parts: 1) geometry initialization, 2) geometry sculpting, and 3) texture generation stages. Given a reference portrait image, we first align the identity features with text features to realize ID-aware guidance enhancement, which contains the control signals representing the face information. We then use the canny map, ID features of the portrait image, and a pre-trained text-to-normal/depth diffusion model to generate ID-aware geometry supervision, and 3D-GAN inversion is employed to generate ID-aware geometry initialization. Furthermore, with the ability to inject identity information into 3D head generation, we use ID-aware guidance to calculate ID-aware Score Distillation (ISD) for geometry sculpting. For texture generation, we adopt the ID Consistent Texture Inpainting and Refinement which progressively expands the view for texture inpainting to obtain an initialization UV texture map. We then use the ID-aware guidance to provide image-level supervision for noisy multi-view images to obtain a refined texture map. Extensive experiments demonstrate that we can generate high-quality 3D heads with accurate geometry and texture from a single in-the-wild portrait image. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16710v3-abstract-full').style.display = 'none'; document.getElementById('2406.16710v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2025; Project page: https://jinkun-hao.github.io/ID-Sculpt/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10999">arXiv:2406.10999</a> <span> [<a href="https://arxiv.org/pdf/2406.10999">pdf</a>, <a href="https://arxiv.org/format/2406.10999">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Balancing Rigor and Utility: Mitigating Cognitive Biases in Large Language Models for Multiple-Choice Questions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liman Wang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+H">Hanyang Zhong</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenting Cao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zeyuan Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10999v3-abstract-short" style="display: inline;"> This paper examines the role of cognitive biases in the decision-making processes of large language models (LLMs), challenging the conventional goal of eliminating all biases. We show that certain cognitive biases when properly balanced, can enhance decision-making efficiency through rational deviations and heuristic shortcuts. By introducing heuristic moderation and an abstention option, which al… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10999v3-abstract-full').style.display = 'inline'; document.getElementById('2406.10999v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10999v3-abstract-full" style="display: none;"> This paper examines the role of cognitive biases in the decision-making processes of large language models (LLMs), challenging the conventional goal of eliminating all biases. We show that certain cognitive biases when properly balanced, can enhance decision-making efficiency through rational deviations and heuristic shortcuts. By introducing heuristic moderation and an abstention option, which allows LLMs to withhold responses when uncertain, we reduce error rates, improve decision accuracy, and optimize decision rates. Using the Balance Rigor and Utility (BRU) dataset, developed through expert collaboration, our findings demonstrate that targeted inspection of cognitive biases aligns LLM decisions more closely with human reasoning, enhancing reliability and suggesting strategies for future improvements. This approach offers a novel way to leverage cognitive biases to improve the practical utility of LLMs across various applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10999v3-abstract-full').style.display = 'none'; document.getElementById('2406.10999v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This article is currently under review. All data will be open on GitHub once the review is complete. https://github.com/limanwang/Balancing-Rigor-and-Utility</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01380">arXiv:2406.01380</a> <span> [<a href="https://arxiv.org/pdf/2406.01380">pdf</a>, <a href="https://arxiv.org/format/2406.01380">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Convolutional Unscented Kalman Filter for Multi-Object Tracking with Outliers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shiqi Liu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenhan Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chang Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S+E">Shengbo Eben Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01380v2-abstract-short" style="display: inline;"> Multi-object tracking (MOT) is an essential technique for navigation in autonomous driving. In tracking-by-detection systems, biases, false positives, and misses, which are referred to as outliers, are inevitable due to complex traffic scenarios. Recent tracking methods are based on filtering algorithms that overlook these outliers, leading to reduced tracking accuracy or even loss of the objects… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01380v2-abstract-full').style.display = 'inline'; document.getElementById('2406.01380v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01380v2-abstract-full" style="display: none;"> Multi-object tracking (MOT) is an essential technique for navigation in autonomous driving. In tracking-by-detection systems, biases, false positives, and misses, which are referred to as outliers, are inevitable due to complex traffic scenarios. Recent tracking methods are based on filtering algorithms that overlook these outliers, leading to reduced tracking accuracy or even loss of the objects trajectory. To handle this challenge, we adopt a probabilistic perspective, regarding the generation of outliers as misspecification between the actual distribution of measurement data and the nominal measurement model used for filtering. We further demonstrate that, by designing a convolutional operation, we can mitigate this misspecification. Incorporating this operation into the widely used unscented Kalman filter (UKF) in commonly adopted tracking algorithms, we derive a variant of the UKF that is robust to outliers, called the convolutional UKF (ConvUKF). We show that ConvUKF maintains the Gaussian conjugate property, thus allowing for real-time tracking. We also prove that ConvUKF has a bounded tracking error in the presence of outliers, which implies robust stability. The experimental results on the KITTI and nuScenes datasets show improved accuracy compared to representative baseline algorithms for MOT tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01380v2-abstract-full').style.display = 'none'; document.getElementById('2406.01380v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Transactions on Intelligent Vehicles</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19027">arXiv:2405.19027</a> <span> [<a href="https://arxiv.org/pdf/2405.19027">pdf</a>, <a href="https://arxiv.org/format/2405.19027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Optimization-based Proof of Useful Work: Framework, Modeling, and Security Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weihang Cao</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+X">Xintong Ling</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiaheng Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xiqi Gao</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Z">Zhi Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19027v2-abstract-short" style="display: inline;"> Proof of Work (PoW) has extensively served as the foundation of blockchain's security, consistency, and tamper-resistance. However, long has it been criticized for its tremendous and inefficient utilization of computational power and energy. Proof of useful work (PoUW) can effectively address the blockchain's sustainability issue by redirecting the computing power towards useful tasks instead of m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19027v2-abstract-full').style.display = 'inline'; document.getElementById('2405.19027v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19027v2-abstract-full" style="display: none;"> Proof of Work (PoW) has extensively served as the foundation of blockchain's security, consistency, and tamper-resistance. However, long has it been criticized for its tremendous and inefficient utilization of computational power and energy. Proof of useful work (PoUW) can effectively address the blockchain's sustainability issue by redirecting the computing power towards useful tasks instead of meaningless hash puzzles. Optimization problems, whose solutions are often hard to find but easy to verify, present a viable class of useful work for PoUW. However, most existing studies rely on either specific problems or particular algorithms, and there lacks comprehensive security analysis for optimization-based PoUW. Therefore, in this work, we build a generic PoUW framework that solves useful optimization problems for blockchain maintenance. Through modeling and analysis, we identify the security conditions against both selfish and malicious miners. Based on these conditions, we establish a lower bound for the security overhead and uncover the trade-off between useful work efficiency and PoW safeguard. We further offer the reward function design guidelines to guarantee miners' integrity. We also show that the optimization-based PoUW is secure in the presence of malicious miners and derive a necessary condition against long-range attacks. Finally, simulation results are presented to validate our analytical results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19027v2-abstract-full').style.display = 'none'; document.getElementById('2405.19027v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18156">arXiv:2405.18156</a> <span> [<a href="https://arxiv.org/pdf/2405.18156">pdf</a>, <a href="https://arxiv.org/format/2405.18156">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VividPose: Advancing Stable Video Diffusion for Realistic Human Image Animation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qilin Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhengkai Jiang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chengming Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiangning Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yabiao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yun Cao</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weijian Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengjie Wang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yanwei Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18156v1-abstract-short" style="display: inline;"> Human image animation involves generating a video from a static image by following a specified pose sequence. Current approaches typically adopt a multi-stage pipeline that separately learns appearance and motion, which often leads to appearance degradation and temporal inconsistencies. To address these issues, we propose VividPose, an innovative end-to-end pipeline based on Stable Video Diffusion… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18156v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18156v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18156v1-abstract-full" style="display: none;"> Human image animation involves generating a video from a static image by following a specified pose sequence. Current approaches typically adopt a multi-stage pipeline that separately learns appearance and motion, which often leads to appearance degradation and temporal inconsistencies. To address these issues, we propose VividPose, an innovative end-to-end pipeline based on Stable Video Diffusion (SVD) that ensures superior temporal stability. To enhance the retention of human identity, we propose an identity-aware appearance controller that integrates additional facial information without compromising other appearance details such as clothing texture and background. This approach ensures that the generated videos maintain high fidelity to the identity of human subject, preserving key facial features across various poses. To accommodate diverse human body shapes and hand movements, we introduce a geometry-aware pose controller that utilizes both dense rendering maps from SMPL-X and sparse skeleton maps. This enables accurate alignment of pose and shape in the generated videos, providing a robust framework capable of handling a wide range of body shapes and dynamic hand movements. Extensive qualitative and quantitative experiments on the UBCFashion and TikTok benchmarks demonstrate that our method achieves state-of-the-art performance. Furthermore, VividPose exhibits superior generalization capabilities on our proposed in-the-wild dataset. Codes and models will be available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18156v1-abstract-full').style.display = 'none'; document.getElementById('2405.18156v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15763">arXiv:2405.15763</a> <span> [<a href="https://arxiv.org/pdf/2405.15763">pdf</a>, <a href="https://arxiv.org/format/2405.15763">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FreeMotion: A Unified Framework for Number-free Text-to-Motion Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+K">Ke Fan</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Junshu Tang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weijian Cao</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ran Yi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Moran Li</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+J">Jingyu Gong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiangning Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yabiao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengjie Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lizhuang Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15763v1-abstract-short" style="display: inline;"> Text-to-motion synthesis is a crucial task in computer vision. Existing methods are limited in their universality, as they are tailored for single-person or two-person scenarios and can not be applied to generate motions for more individuals. To achieve the number-free motion synthesis, this paper reconsiders motion generation and proposes to unify the single and multi-person motion by the conditi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15763v1-abstract-full').style.display = 'inline'; document.getElementById('2405.15763v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15763v1-abstract-full" style="display: none;"> Text-to-motion synthesis is a crucial task in computer vision. Existing methods are limited in their universality, as they are tailored for single-person or two-person scenarios and can not be applied to generate motions for more individuals. To achieve the number-free motion synthesis, this paper reconsiders motion generation and proposes to unify the single and multi-person motion by the conditional motion distribution. Furthermore, a generation module and an interaction module are designed for our FreeMotion framework to decouple the process of conditional motion generation and finally support the number-free motion synthesis. Besides, based on our framework, the current single-person motion spatial control method could be seamlessly integrated, achieving precise control of multi-person motion. Extensive experiments demonstrate the superior performance of our method and our capability to infer single and multi-human motions simultaneously. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15763v1-abstract-full').style.display = 'none'; document.getElementById('2405.15763v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.03008">arXiv:2405.03008</a> <span> [<a href="https://arxiv.org/pdf/2405.03008">pdf</a>, <a href="https://arxiv.org/format/2405.03008">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DVMSR: Distillated Vision Mamba for Efficient Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lei%2C+X">Xiaoyan Lei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenlong Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weifeng Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.03008v2-abstract-short" style="display: inline;"> Efficient Image Super-Resolution (SR) aims to accelerate SR network inference by minimizing computational complexity and network parameters while preserving performance. Existing state-of-the-art Efficient Image Super-Resolution methods are based on convolutional neural networks. Few attempts have been made with Mamba to harness its long-range modeling capability and efficient computational comple… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03008v2-abstract-full').style.display = 'inline'; document.getElementById('2405.03008v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.03008v2-abstract-full" style="display: none;"> Efficient Image Super-Resolution (SR) aims to accelerate SR network inference by minimizing computational complexity and network parameters while preserving performance. Existing state-of-the-art Efficient Image Super-Resolution methods are based on convolutional neural networks. Few attempts have been made with Mamba to harness its long-range modeling capability and efficient computational complexity, which have shown impressive performance on high-level vision tasks. In this paper, we propose DVMSR, a novel lightweight Image SR network that incorporates Vision Mamba and a distillation strategy. The network of DVMSR consists of three modules: feature extraction convolution, multiple stacked Residual State Space Blocks (RSSBs), and a reconstruction module. Specifically, the deep feature extraction module is composed of several residual state space blocks (RSSB), each of which has several Vision Mamba Moudles(ViMM) together with a residual connection. To achieve efficiency improvement while maintaining comparable performance, we employ a distillation strategy to the vision Mamba network for superior performance. Specifically, we leverage the rich representation knowledge of teacher network as additional supervision for the output of lightweight student networks. Extensive experiments have demonstrated that our proposed DVMSR can outperform state-of-the-art efficient SR methods in terms of model parameters while maintaining the performance of both PSNR and SSIM. The source code is available at https://github.com/nathan66666/DVMSR.git <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03008v2-abstract-full').style.display = 'none'; document.getElementById('2405.03008v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.00027">arXiv:2405.00027</a> <span> [<a href="https://arxiv.org/pdf/2405.00027">pdf</a>, <a href="https://arxiv.org/format/2405.00027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5220/0012431300003660">10.5220/0012431300003660 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Multidimensional Compressed Sensing for Spectral Light Field Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wen Cao</a>, <a href="/search/cs?searchtype=author&query=Miandji%2C+E">Ehsan Miandji</a>, <a href="/search/cs?searchtype=author&query=Unger%2C+J">Jonas Unger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.00027v1-abstract-short" style="display: inline;"> This paper considers a compressive multi-spectral light field camera model that utilizes a one-hot spectralcoded mask and a microlens array to capture spatial, angular, and spectral information using a single monochrome sensor. We propose a model that employs compressed sensing techniques to reconstruct the complete multi-spectral light field from undersampled measurements. Unlike previous work wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00027v1-abstract-full').style.display = 'inline'; document.getElementById('2405.00027v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.00027v1-abstract-full" style="display: none;"> This paper considers a compressive multi-spectral light field camera model that utilizes a one-hot spectralcoded mask and a microlens array to capture spatial, angular, and spectral information using a single monochrome sensor. We propose a model that employs compressed sensing techniques to reconstruct the complete multi-spectral light field from undersampled measurements. Unlike previous work where a light field is vectorized to a 1D signal, our method employs a 5D basis and a novel 5D measurement model, hence, matching the intrinsic dimensionality of multispectral light fields. We mathematically and empirically show the equivalence of 5D and 1D sensing models, and most importantly that the 5D framework achieves orders of magnitude faster reconstruction while requiring a small fraction of the memory. Moreover, our new multidimensional sensing model opens new research directions for designing efficient visual data acquisition algorithms and hardware. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00027v1-abstract-full').style.display = 'none'; document.getElementById('2405.00027v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, published of VISAPP 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In Proceedings of the 19th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications - Volume 4: VISAPP 2024, ISBN 978-989-758-679-8, ISSN 2184-4321, pages 349-356 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16687">arXiv:2404.16687</a> <span> [<a href="https://arxiv.org/pdf/2404.16687">pdf</a>, <a href="https://arxiv.org/format/2404.16687">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2024 Quality Assessment of AI-Generated Content Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaohong Liu</a>, <a href="/search/cs?searchtype=author&query=Min%2C+X">Xiongkuo Min</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+G">Guangtao Zhai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chunyi Li</a>, <a href="/search/cs?searchtype=author&query=Kou%2C+T">Tengchuan Kou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Wei Sun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haoning Wu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yixuan Gao</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuqin Cao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zicheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiele Wu</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+F">Fei Peng</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+H">Huiyuan Fu</a>, <a href="/search/cs?searchtype=author&query=Ming%2C+A">Anlong Ming</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chuanming Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+H">Huadong Ma</a>, <a href="/search/cs?searchtype=author&query=He%2C+S">Shuai He</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Z">Zifei Dou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huacong Zhang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+H">Haiyi Xie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengwei Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Baoying Chen</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+J">Jishen Zeng</a> , et al. (89 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16687v2-abstract-short" style="display: inline;"> This paper reports on the NTIRE 2024 Quality Assessment of AI-Generated Content Challenge, which will be held in conjunction with the New Trends in Image Restoration and Enhancement Workshop (NTIRE) at CVPR 2024. This challenge is to address a major challenge in the field of image and video processing, namely, Image Quality Assessment (IQA) and Video Quality Assessment (VQA) for AI-Generated Conte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16687v2-abstract-full').style.display = 'inline'; document.getElementById('2404.16687v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16687v2-abstract-full" style="display: none;"> This paper reports on the NTIRE 2024 Quality Assessment of AI-Generated Content Challenge, which will be held in conjunction with the New Trends in Image Restoration and Enhancement Workshop (NTIRE) at CVPR 2024. This challenge is to address a major challenge in the field of image and video processing, namely, Image Quality Assessment (IQA) and Video Quality Assessment (VQA) for AI-Generated Content (AIGC). The challenge is divided into the image track and the video track. The image track uses the AIGIQA-20K, which contains 20,000 AI-Generated Images (AIGIs) generated by 15 popular generative models. The image track has a total of 318 registered participants. A total of 1,646 submissions are received in the development phase, and 221 submissions are received in the test phase. Finally, 16 participating teams submitted their models and fact sheets. The video track uses the T2VQA-DB, which contains 10,000 AI-Generated Videos (AIGVs) generated by 9 popular Text-to-Video (T2V) models. A total of 196 participants have registered in the video track. A total of 991 submissions are received in the development phase, and 185 submissions are received in the test phase. Finally, 12 participating teams submitted their models and fact sheets. Some methods have achieved better results than baseline methods, and the winning methods in both tracks have demonstrated superior prediction performance on AIGC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16687v2-abstract-full').style.display = 'none'; document.getElementById('2404.16687v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.10343">arXiv:2404.10343</a> <span> [<a href="https://arxiv.org/pdf/2404.10343">pdf</a>, <a href="https://arxiv.org/format/2404.10343">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> The Ninth NTIRE 2024 Efficient Super-Resolution Challenge Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+B">Bin Ren</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yawei Li</a>, <a href="/search/cs?searchtype=author&query=Mehta%2C+N">Nancy Mehta</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hongyuan Yu</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+C">Cheng Wan</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yuxin Hong</a>, <a href="/search/cs?searchtype=author&query=Han%2C+B">Bingnan Han</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhuoyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Y">Yajun Zou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuqing Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jizhe Li</a>, <a href="/search/cs?searchtype=author&query=He%2C+K">Keji He</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+C">Chao Fan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Heng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaolin Zhang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+X">Xuanwu Yin</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+K">Kunlong Zuo</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+B">Bohao Liao</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+P">Peizhe Xia</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+L">Long Peng</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Z">Zhibo Du</a>, <a href="/search/cs?searchtype=author&query=Di%2C+X">Xin Di</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wangkai Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yang Wang</a> , et al. (109 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.10343v2-abstract-short" style="display: inline;"> This paper provides a comprehensive review of the NTIRE 2024 challenge, focusing on efficient single-image super-resolution (ESR) solutions and their outcomes. The task of this challenge is to super-resolve an input image with a magnification factor of x4 based on pairs of low and corresponding high-resolution images. The primary objective is to develop networks that optimize various aspects such… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10343v2-abstract-full').style.display = 'inline'; document.getElementById('2404.10343v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.10343v2-abstract-full" style="display: none;"> This paper provides a comprehensive review of the NTIRE 2024 challenge, focusing on efficient single-image super-resolution (ESR) solutions and their outcomes. The task of this challenge is to super-resolve an input image with a magnification factor of x4 based on pairs of low and corresponding high-resolution images. The primary objective is to develop networks that optimize various aspects such as runtime, parameters, and FLOPs, while still maintaining a peak signal-to-noise ratio (PSNR) of approximately 26.90 dB on the DIV2K_LSDIR_valid dataset and 26.99 dB on the DIV2K_LSDIR_test dataset. In addition, this challenge has 4 tracks including the main track (overall performance), sub-track 1 (runtime), sub-track 2 (FLOPs), and sub-track 3 (parameters). In the main track, all three metrics (ie runtime, FLOPs, and parameter count) were considered. The ranking of the main track is calculated based on a weighted sum-up of the scores of all other sub-tracks. In sub-track 1, the practical runtime performance of the submissions was evaluated, and the corresponding score was used to determine the ranking. In sub-track 2, the number of FLOPs was considered. The score calculated based on the corresponding FLOPs was used to determine the ranking. In sub-track 3, the number of parameters was considered. The score calculated based on the corresponding parameters was used to determine the ranking. RLFN is set as the baseline for efficiency measurement. The challenge had 262 registered participants, and 34 teams made valid submissions. They gauge the state-of-the-art in efficient single-image super-resolution. To facilitate the reproducibility of the challenge and enable other researchers to build upon these findings, the code and the pre-trained model of validated solutions are made publicly available at https://github.com/Amazingren/NTIRE2024_ESR/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10343v2-abstract-full').style.display = 'none'; document.getElementById('2404.10343v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The report paper of NTIRE2024 Efficient Super-resolution, accepted by CVPRW2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.04936">arXiv:2404.04936</a> <span> [<a href="https://arxiv.org/pdf/2404.04936">pdf</a>, <a href="https://arxiv.org/format/2404.04936">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Bootstrapping Chest CT Image Understanding by Distilling Knowledge from X-ray Expert Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weiwei Cao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianpeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yingda Xia</a>, <a href="/search/cs?searchtype=author&query=Mok%2C+T+C+W">Tony C. W. Mok</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zi Li</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xianghua Ye</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jian Zheng</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yuxing Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Ling Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.04936v1-abstract-short" style="display: inline;"> Radiologists highly desire fully automated versatile AI for medical imaging interpretation. However, the lack of extensively annotated large-scale multi-disease datasets has hindered the achievement of this goal. In this paper, we explore the feasibility of leveraging language as a naturally high-quality supervision for chest CT imaging. In light of the limited availability of image-report pairs,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04936v1-abstract-full').style.display = 'inline'; document.getElementById('2404.04936v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.04936v1-abstract-full" style="display: none;"> Radiologists highly desire fully automated versatile AI for medical imaging interpretation. However, the lack of extensively annotated large-scale multi-disease datasets has hindered the achievement of this goal. In this paper, we explore the feasibility of leveraging language as a naturally high-quality supervision for chest CT imaging. In light of the limited availability of image-report pairs, we bootstrap the understanding of 3D chest CT images by distilling chest-related diagnostic knowledge from an extensively pre-trained 2D X-ray expert model. Specifically, we propose a language-guided retrieval method to match each 3D CT image with its semantically closest 2D X-ray image, and perform pair-wise and semantic relation knowledge distillation. Subsequently, we use contrastive learning to align images and reports within the same patient while distinguishing them from the other patients. However, the challenge arises when patients have similar semantic diagnoses, such as healthy patients, potentially confusing if treated as negatives. We introduce a robust contrastive learning that identifies and corrects these false negatives. We train our model with over 12,000 pairs of chest CT images and radiology reports. Extensive experiments across multiple scenarios, including zero-shot learning, report generation, and fine-tuning processes, demonstrate the model's feasibility in interpreting chest CT images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04936v1-abstract-full').style.display = 'none'; document.getElementById('2404.04936v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00481">arXiv:2404.00481</a> <span> [<a href="https://arxiv.org/pdf/2404.00481">pdf</a>, <a href="https://arxiv.org/format/2404.00481">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Convolutional Bayesian Filtering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenhan Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shiqi Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chang Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+Z">Zeyu He</a>, <a href="/search/cs?searchtype=author&query=Yau%2C+S+S+-">Stephen S. -T. Yau</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S+E">Shengbo Eben Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00481v1-abstract-short" style="display: inline;"> Bayesian filtering serves as the mainstream framework of state estimation in dynamic systems. Its standard version utilizes total probability rule and Bayes' law alternatively, where how to define and compute conditional probability is critical to state distribution inference. Previously, the conditional probability is assumed to be exactly known, which represents a measure of the occurrence proba… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00481v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00481v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00481v1-abstract-full" style="display: none;"> Bayesian filtering serves as the mainstream framework of state estimation in dynamic systems. Its standard version utilizes total probability rule and Bayes' law alternatively, where how to define and compute conditional probability is critical to state distribution inference. Previously, the conditional probability is assumed to be exactly known, which represents a measure of the occurrence probability of one event, given the second event. In this paper, we find that by adding an additional event that stipulates an inequality condition, we can transform the conditional probability into a special integration that is analogous to convolution. Based on this transformation, we show that both transition probability and output probability can be generalized to convolutional forms, resulting in a more general filtering framework that we call convolutional Bayesian filtering. This new framework encompasses standard Bayesian filtering as a special case when the distance metric of the inequality condition is selected as Dirac delta function. It also allows for a more nuanced consideration of model mismatch by choosing different types of inequality conditions. For instance, when the distance metric is defined in a distributional sense, the transition probability and output probability can be approximated by simply rescaling them into fractional powers. Under this framework, a robust version of Kalman filter can be constructed by only altering the noise covariance matrix, while maintaining the conjugate nature of Gaussian distributions. Finally, we exemplify the effectiveness of our approach by reshaping classic filtering algorithms into convolutional versions, including Kalman filter, extended Kalman filter, unscented Kalman filter and particle filter. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00481v1-abstract-full').style.display = 'none'; document.getElementById('2404.00481v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.17664">arXiv:2403.17664</a> <span> [<a href="https://arxiv.org/pdf/2403.17664">pdf</a>, <a href="https://arxiv.org/format/2403.17664">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DiffFAE: Advancing High-fidelity One-shot Facial Appearance Editing with Space-sensitive Customization and Semantic Preservation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qilin Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiangning Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chengming Xu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weijian Cao</a>, <a href="/search/cs?searchtype=author&query=Tai%2C+Y">Ying Tai</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yue Han</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yanhao Ge</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+H">Hong Gu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengjie Wang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yanwei Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.17664v1-abstract-short" style="display: inline;"> Facial Appearance Editing (FAE) aims to modify physical attributes, such as pose, expression and lighting, of human facial images while preserving attributes like identity and background, showing great importance in photograph. In spite of the great progress in this area, current researches generally meet three challenges: low generation fidelity, poor attribute preservation, and inefficient infer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17664v1-abstract-full').style.display = 'inline'; document.getElementById('2403.17664v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.17664v1-abstract-full" style="display: none;"> Facial Appearance Editing (FAE) aims to modify physical attributes, such as pose, expression and lighting, of human facial images while preserving attributes like identity and background, showing great importance in photograph. In spite of the great progress in this area, current researches generally meet three challenges: low generation fidelity, poor attribute preservation, and inefficient inference. To overcome above challenges, this paper presents DiffFAE, a one-stage and highly-efficient diffusion-based framework tailored for high-fidelity FAE. For high-fidelity query attributes transfer, we adopt Space-sensitive Physical Customization (SPC), which ensures the fidelity and generalization ability by utilizing rendering texture derived from 3D Morphable Model (3DMM). In order to preserve source attributes, we introduce the Region-responsive Semantic Composition (RSC). This module is guided to learn decoupled source-regarding features, thereby better preserving the identity and alleviating artifacts from non-facial attributes such as hair, clothes, and background. We further introduce a consistency regularization for our pipeline to enhance editing controllability by leveraging prior knowledge in the attention matrices of diffusion model. Extensive experiments demonstrate the superiority of DiffFAE over existing methods, achieving state-of-the-art performance in facial appearance editing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17664v1-abstract-full').style.display = 'none'; document.getElementById('2403.17664v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.12906">arXiv:2403.12906</a> <span> [<a href="https://arxiv.org/pdf/2403.12906">pdf</a>, <a href="https://arxiv.org/format/2403.12906">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TexDreamer: Towards Zero-Shot High-Fidelity 3D Human Texture Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yufei Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Junwei Zhu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Junshu Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shijie Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiangning Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weijian Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengjie Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yunsheng Wu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+D">Dongjin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.12906v1-abstract-short" style="display: inline;"> Texturing 3D humans with semantic UV maps remains a challenge due to the difficulty of acquiring reasonably unfolded UV. Despite recent text-to-3D advancements in supervising multi-view renderings using large text-to-image (T2I) models, issues persist with generation speed, text consistency, and texture quality, resulting in data scarcity among existing datasets. We present TexDreamer, the first z… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12906v1-abstract-full').style.display = 'inline'; document.getElementById('2403.12906v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.12906v1-abstract-full" style="display: none;"> Texturing 3D humans with semantic UV maps remains a challenge due to the difficulty of acquiring reasonably unfolded UV. Despite recent text-to-3D advancements in supervising multi-view renderings using large text-to-image (T2I) models, issues persist with generation speed, text consistency, and texture quality, resulting in data scarcity among existing datasets. We present TexDreamer, the first zero-shot multimodal high-fidelity 3D human texture generation model. Utilizing an efficient texture adaptation finetuning strategy, we adapt large T2I model to a semantic UV structure while preserving its original generalization capability. Leveraging a novel feature translator module, the trained model is capable of generating high-fidelity 3D human textures from either text or image within seconds. Furthermore, we introduce ArTicuLated humAn textureS (ATLAS), the largest high-resolution (1024 X 1024) 3D human texture dataset which contains 50k high-fidelity textures with text descriptions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12906v1-abstract-full').style.display = 'none'; document.getElementById('2403.12906v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://ggxxii.github.io/texdreamer/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07954">arXiv:2403.07954</a> <span> [<a href="https://arxiv.org/pdf/2403.07954">pdf</a>, <a href="https://arxiv.org/format/2403.07954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Polynomial Graph Filters: A Novel Adaptive Krylov Subspace Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+K">Keke Huang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wencai Cao</a>, <a href="/search/cs?searchtype=author&query=Ta%2C+H">Hoang Ta</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+X">Xiaokui Xiao</a>, <a href="/search/cs?searchtype=author&query=Li%C3%B2%2C+P">Pietro Li貌</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07954v2-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs), known as spectral graph filters, find a wide range of applications in web networks. To bypass eigendecomposition, polynomial graph filters are proposed to approximate graph filters by leveraging various polynomial bases for filter training. However, no existing studies have explored the diverse polynomial graph filters from a unified perspective for optimization. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07954v2-abstract-full').style.display = 'inline'; document.getElementById('2403.07954v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07954v2-abstract-full" style="display: none;"> Graph Neural Networks (GNNs), known as spectral graph filters, find a wide range of applications in web networks. To bypass eigendecomposition, polynomial graph filters are proposed to approximate graph filters by leveraging various polynomial bases for filter training. However, no existing studies have explored the diverse polynomial graph filters from a unified perspective for optimization. In this paper, we first unify polynomial graph filters, as well as the optimal filters of identical degrees into the Krylov subspace of the same order, thus providing equivalent expressive power theoretically. Next, we investigate the asymptotic convergence property of polynomials from the unified Krylov subspace perspective, revealing their limited adaptability in graphs with varying heterophily degrees. Inspired by those facts, we design a novel adaptive Krylov subspace approach to optimize polynomial bases with provable controllability over the graph spectrum so as to adapt various heterophily graphs. Subsequently, we propose AdaptKry, an optimized polynomial graph filter utilizing bases from the adaptive Krylov subspaces. Meanwhile, in light of the diverse spectral properties of complex graphs, we extend AdaptKry by leveraging multiple adaptive Krylov bases without incurring extra training costs. As a consequence, extended AdaptKry is able to capture the intricate characteristics of graphs and provide insights into their inherent complexity. We conduct extensive experiments across a series of real-world datasets. The experimental results demonstrate the superior filtering capability of AdaptKry, as well as the optimized efficacy of the adaptive Krylov basis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07954v2-abstract-full').style.display = 'none'; document.getElementById('2403.07954v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.04268">arXiv:2403.04268</a> <span> [<a href="https://arxiv.org/pdf/2403.04268">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Qubit-Wise Architecture Search Method for Variational Quantum Circuits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jialin Chen</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Z">Zhiqiang Cai</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Ke Xu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Di Wu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wei Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.04268v1-abstract-short" style="display: inline;"> Considering the noise level limit, one crucial aspect for quantum machine learning is to design a high-performing variational quantum circuit architecture with small number of quantum gates. As the classical neural architecture search (NAS), quantum architecture search methods (QAS) employ methods like reinforcement learning, evolutionary algorithms and supernet optimiza-tion to improve the search… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04268v1-abstract-full').style.display = 'inline'; document.getElementById('2403.04268v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.04268v1-abstract-full" style="display: none;"> Considering the noise level limit, one crucial aspect for quantum machine learning is to design a high-performing variational quantum circuit architecture with small number of quantum gates. As the classical neural architecture search (NAS), quantum architecture search methods (QAS) employ methods like reinforcement learning, evolutionary algorithms and supernet optimiza-tion to improve the search efficiency. In this paper, we propose a novel qubit-wise architec-ture search (QWAS) method, which progres-sively search one-qubit configuration per stage, and combine with Monte Carlo Tree Search al-gorithm to find good quantum architectures by partitioning the search space into several good and bad subregions. The numerical experimental results indicate that our proposed method can balance the exploration and exploitation of cir-cuit performance and size in some real-world tasks, such as MNIST, Fashion and MOSI. As far as we know, QWAS achieves the state-of-art re-sults of all tasks in the terms of accuracy and circuit size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04268v1-abstract-full').style.display = 'none'; document.getElementById('2403.04268v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Cao%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Cao%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Cao%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Cao%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository