Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 3,758 results for author: <span class="mathjax">Huang, Z</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Huang, Z"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Huang%2C+Z&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Huang, Z"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14062">arXiv:2411.14062</a> <span> [<a href="https://arxiv.org/pdf/2411.14062">pdf</a>, <a href="https://arxiv.org/format/2411.14062">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MMGenBench: Evaluating the Limits of LMMs from the Text-to-Image Generation Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+H">Hailang Huang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yong Wang</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zixuan Huang</a>, <a href="/search/?searchtype=author&query=Li%2C+H">Huaqiu Li</a>, <a href="/search/?searchtype=author&query=Huang%2C+T">Tongwen Huang</a>, <a href="/search/?searchtype=author&query=Chu%2C+X">Xiangxiang Chu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+R">Richong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14062v1-abstract-short" style="display: inline;"> Large Multimodal Models (LMMs) have demonstrated remarkable capabilities. While existing benchmarks for evaluating LMMs mainly focus on image comprehension, few works evaluate them from the image generation perspective. To address this issue, we propose a straightforward automated evaluation pipeline. Specifically, this pipeline requires LMMs to generate an image-prompt from a given input image. S… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14062v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14062v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14062v1-abstract-full" style="display: none;"> Large Multimodal Models (LMMs) have demonstrated remarkable capabilities. While existing benchmarks for evaluating LMMs mainly focus on image comprehension, few works evaluate them from the image generation perspective. To address this issue, we propose a straightforward automated evaluation pipeline. Specifically, this pipeline requires LMMs to generate an image-prompt from a given input image. Subsequently, it employs text-to-image generative models to create a new image based on these generated prompts. Finally, we evaluate the performance of LMMs by comparing the original image with the generated one. Furthermore, we introduce MMGenBench-Test, a comprehensive benchmark developed to evaluate LMMs across 13 distinct image patterns, and MMGenBench-Domain, targeting the performance evaluation of LMMs within the generative image domain. A thorough evaluation involving over 50 popular LMMs demonstrates the effectiveness and reliability in both the pipeline and benchmark. Our observations indicate that numerous LMMs excelling in existing benchmarks fail to adequately complete the basic tasks, related to image understanding and description. This finding highlights the substantial potential for performance improvement in current LMMs and suggests avenues for future model optimization. Concurrently, our pipeline facilitates the efficient assessment of LMMs performance across diverse domains by using solely image inputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14062v1-abstract-full').style.display = 'none'; document.getElementById('2411.14062v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This project is available at: https://github.com/lerogo/MMGenBench</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13725">arXiv:2411.13725</a> <span> [<a href="https://arxiv.org/pdf/2411.13725">pdf</a>, <a href="https://arxiv.org/format/2411.13725">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Renormalization of States and Quasiparticles in Many-body Downfolding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Canestraight%2C+A">Annabelle Canestraight</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhen Huang</a>, <a href="/search/?searchtype=author&query=Vlcek%2C+V">Vojtech Vlcek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13725v1-abstract-short" style="display: inline;"> We explore the principles of many-body Hamiltonian complexity reduction via downfolding on an effective low-dimensional representation. We present a unique measure of fidelity between the effective (reduced-rank) description and the full many-body treatment for arbitrary (i.e., ground and excited) states. When the entire problem is mapped on a system of interacting quasiparticles [npj Computationa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13725v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13725v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13725v1-abstract-full" style="display: none;"> We explore the principles of many-body Hamiltonian complexity reduction via downfolding on an effective low-dimensional representation. We present a unique measure of fidelity between the effective (reduced-rank) description and the full many-body treatment for arbitrary (i.e., ground and excited) states. When the entire problem is mapped on a system of interacting quasiparticles [npj Computational Materials 9 (1), 126, 2023], the effective Hamiltonians can faithfully reproduce the physics only when a clear energy scale separation exists between the subsystems and its environment. We also demonstrate that it is necessary to include quasiparticle renormalization at distinct energy scales, capturing the distinct interaction between subsystems and their surrounding environments. Numerical results from simple, exactly solvable models highlight the limitations and strengths of this approach, particularly for ground and low-lying excited states. This work lays the groundwork for applying dynamical downfolding techniques to problems concerned with (quantum) interfaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13725v1-abstract-full').style.display = 'none'; document.getElementById('2411.13725v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13602">arXiv:2411.13602</a> <span> [<a href="https://arxiv.org/pdf/2411.13602">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Large-scale cross-modality pretrained model enhances cardiovascular state estimation and cardiomyopathy detection from electrocardiograms: An AI system development and multi-center validation study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ding%2C+Z">Zhengyao Ding</a>, <a href="/search/?searchtype=author&query=Hu%2C+Y">Yujian Hu</a>, <a href="/search/?searchtype=author&query=Xu%2C+Y">Youyao Xu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+C">Chengchen Zhao</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Ziyu Li</a>, <a href="/search/?searchtype=author&query=Mao%2C+Y">Yiheng Mao</a>, <a href="/search/?searchtype=author&query=Li%2C+H">Haitao Li</a>, <a href="/search/?searchtype=author&query=Li%2C+Q">Qian Li</a>, <a href="/search/?searchtype=author&query=Wang%2C+J">Jing Wang</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yue Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+M">Mengjia Chen</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Longbo Wang</a>, <a href="/search/?searchtype=author&query=Chu%2C+X">Xuesen Chu</a>, <a href="/search/?searchtype=author&query=Pan%2C+W">Weichao Pan</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Ziyi Liu</a>, <a href="/search/?searchtype=author&query=Wu%2C+F">Fei Wu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongkun Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+T">Ting Chen</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhengxing Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13602v1-abstract-short" style="display: inline;"> Cardiovascular diseases (CVDs) present significant challenges for early and accurate diagnosis. While cardiac magnetic resonance imaging (CMR) is the gold standard for assessing cardiac function and diagnosing CVDs, its high cost and technical complexity limit accessibility. In contrast, electrocardiography (ECG) offers promise for large-scale early screening. This study introduces CardiacNets, an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13602v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13602v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13602v1-abstract-full" style="display: none;"> Cardiovascular diseases (CVDs) present significant challenges for early and accurate diagnosis. While cardiac magnetic resonance imaging (CMR) is the gold standard for assessing cardiac function and diagnosing CVDs, its high cost and technical complexity limit accessibility. In contrast, electrocardiography (ECG) offers promise for large-scale early screening. This study introduces CardiacNets, an innovative model that enhances ECG analysis by leveraging the diagnostic strengths of CMR through cross-modal contrastive learning and generative pretraining. CardiacNets serves two primary functions: (1) it evaluates detailed cardiac function indicators and screens for potential CVDs, including coronary artery disease, cardiomyopathy, pericarditis, heart failure and pulmonary hypertension, using ECG input; and (2) it enhances interpretability by generating high-quality CMR images from ECG data. We train and validate the proposed CardiacNets on two large-scale public datasets (the UK Biobank with 41,519 individuals and the MIMIC-IV-ECG comprising 501,172 samples) as well as three private datasets (FAHZU with 410 individuals, SAHZU with 464 individuals, and QPH with 338 individuals), and the findings demonstrate that CardiacNets consistently outperforms traditional ECG-only models, substantially improving screening accuracy. Furthermore, the generated CMR images provide valuable diagnostic support for physicians of all experience levels. This proof-of-concept study highlights how ECG can facilitate cross-modal insights into cardiac function assessment, paving the way for enhanced CVD screening and diagnosis at a population level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13602v1-abstract-full').style.display = 'none'; document.getElementById('2411.13602v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13503">arXiv:2411.13503</a> <span> [<a href="https://arxiv.org/pdf/2411.13503">pdf</a>, <a href="https://arxiv.org/format/2411.13503">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VBench++: Comprehensive and Versatile Benchmark Suite for Video Generative Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Ziqi Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+F">Fan Zhang</a>, <a href="/search/?searchtype=author&query=Xu%2C+X">Xiaojie Xu</a>, <a href="/search/?searchtype=author&query=He%2C+Y">Yinan He</a>, <a href="/search/?searchtype=author&query=Yu%2C+J">Jiashuo Yu</a>, <a href="/search/?searchtype=author&query=Dong%2C+Z">Ziyue Dong</a>, <a href="/search/?searchtype=author&query=Ma%2C+Q">Qianli Ma</a>, <a href="/search/?searchtype=author&query=Chanpaisit%2C+N">Nattapol Chanpaisit</a>, <a href="/search/?searchtype=author&query=Si%2C+C">Chenyang Si</a>, <a href="/search/?searchtype=author&query=Jiang%2C+Y">Yuming Jiang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yaohui Wang</a>, <a href="/search/?searchtype=author&query=Chen%2C+X">Xinyuan Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Ying-Cong Chen</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Limin Wang</a>, <a href="/search/?searchtype=author&query=Lin%2C+D">Dahua Lin</a>, <a href="/search/?searchtype=author&query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Ziwei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13503v1-abstract-short" style="display: inline;"> Video generation has witnessed significant advancements, yet evaluating these models remains a challenge. A comprehensive evaluation benchmark for video generation is indispensable for two reasons: 1) Existing metrics do not fully align with human perceptions; 2) An ideal evaluation system should provide insights to inform future developments of video generation. To this end, we present VBench, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13503v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13503v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13503v1-abstract-full" style="display: none;"> Video generation has witnessed significant advancements, yet evaluating these models remains a challenge. A comprehensive evaluation benchmark for video generation is indispensable for two reasons: 1) Existing metrics do not fully align with human perceptions; 2) An ideal evaluation system should provide insights to inform future developments of video generation. To this end, we present VBench, a comprehensive benchmark suite that dissects "video generation quality" into specific, hierarchical, and disentangled dimensions, each with tailored prompts and evaluation methods. VBench has several appealing properties: 1) Comprehensive Dimensions: VBench comprises 16 dimensions in video generation (e.g., subject identity inconsistency, motion smoothness, temporal flickering, and spatial relationship, etc). The evaluation metrics with fine-grained levels reveal individual models' strengths and weaknesses. 2) Human Alignment: We also provide a dataset of human preference annotations to validate our benchmarks' alignment with human perception, for each evaluation dimension respectively. 3) Valuable Insights: We look into current models' ability across various evaluation dimensions, and various content types. We also investigate the gaps between video and image generation models. 4) Versatile Benchmarking: VBench++ supports evaluating text-to-video and image-to-video. We introduce a high-quality Image Suite with an adaptive aspect ratio to enable fair evaluations across different image-to-video generation settings. Beyond assessing technical quality, VBench++ evaluates the trustworthiness of video generative models, providing a more holistic view of model performance. 5) Full Open-Sourcing: We fully open-source VBench++ and continually add new video generation models to our leaderboard to drive forward the field of video generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13503v1-abstract-full').style.display = 'none'; document.getElementById('2411.13503v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Leaderboard: https://huggingface.co/spaces/Vchitect/VBench_Leaderboard Code: https://github.com/Vchitect/VBench Project page: https://vchitect.github.io/VBench-project/ extension of arXiv:2311.17982. arXiv admin note: substantial text overlap with arXiv:2311.17982</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12364">arXiv:2411.12364</a> <span> [<a href="https://arxiv.org/pdf/2411.12364">pdf</a>, <a href="https://arxiv.org/format/2411.12364">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Ultra-Sparse Memory Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zihao Huang</a>, <a href="/search/?searchtype=author&query=Min%2C+Q">Qiyang Min</a>, <a href="/search/?searchtype=author&query=Huang%2C+H">Hongzhi Huang</a>, <a href="/search/?searchtype=author&query=Zhu%2C+D">Defa Zhu</a>, <a href="/search/?searchtype=author&query=Zeng%2C+Y">Yutao Zeng</a>, <a href="/search/?searchtype=author&query=Guo%2C+R">Ran Guo</a>, <a href="/search/?searchtype=author&query=Zhou%2C+X">Xun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12364v1-abstract-short" style="display: inline;"> It is widely acknowledged that the performance of Transformer models is exponentially related to their number of parameters and computational complexity. While approaches like Mixture of Experts (MoE) decouple parameter count from computational complexity, they still face challenges in inference due to high memory access costs. This work introduces UltraMem, incorporating large-scale, ultra-sparse… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12364v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12364v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12364v1-abstract-full" style="display: none;"> It is widely acknowledged that the performance of Transformer models is exponentially related to their number of parameters and computational complexity. While approaches like Mixture of Experts (MoE) decouple parameter count from computational complexity, they still face challenges in inference due to high memory access costs. This work introduces UltraMem, incorporating large-scale, ultra-sparse memory layer to address these limitations. Our approach significantly reduces inference latency while maintaining model performance. We also investigate the scaling laws of this new architecture, demonstrating that it not only exhibits favorable scaling properties but outperforms traditional models. In our experiments, we train networks with up to 20 million memory slots. The results show that our method achieves state-of-the-art inference speed and model performance within a given computational budget. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12364v1-abstract-full').style.display = 'none'; document.getElementById('2411.12364v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12301">arXiv:2411.12301</a> <span> [<a href="https://arxiv.org/pdf/2411.12301">pdf</a>, <a href="https://arxiv.org/format/2411.12301">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Physics-Guided Detector for SAR Airplanes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhongling Huang</a>, <a href="/search/?searchtype=author&query=Liu%2C+L">Long Liu</a>, <a href="/search/?searchtype=author&query=Yang%2C+S">Shuxin Yang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhirui Wang</a>, <a href="/search/?searchtype=author&query=Cheng%2C+G">Gong Cheng</a>, <a href="/search/?searchtype=author&query=Han%2C+J">Junwei Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12301v1-abstract-short" style="display: inline;"> The disperse structure distributions (discreteness) and variant scattering characteristics (variability) of SAR airplane targets lead to special challenges of object detection and recognition. The current deep learning-based detectors encounter challenges in distinguishing fine-grained SAR airplanes against complex backgrounds. To address it, we propose a novel physics-guided detector (PGD) learni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12301v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12301v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12301v1-abstract-full" style="display: none;"> The disperse structure distributions (discreteness) and variant scattering characteristics (variability) of SAR airplane targets lead to special challenges of object detection and recognition. The current deep learning-based detectors encounter challenges in distinguishing fine-grained SAR airplanes against complex backgrounds. To address it, we propose a novel physics-guided detector (PGD) learning paradigm for SAR airplanes that comprehensively investigate their discreteness and variability to improve the detection performance. It is a general learning paradigm that can be extended to different existing deep learning-based detectors with "backbone-neck-head" architectures. The main contributions of PGD include the physics-guided self-supervised learning, feature enhancement, and instance perception, denoted as PGSSL, PGFE, and PGIP, respectively. PGSSL aims to construct a self-supervised learning task based on a wide range of SAR airplane targets that encodes the prior knowledge of various discrete structure distributions into the embedded space. Then, PGFE enhances the multi-scale feature representation of a detector, guided by the physics-aware information learned from PGSSL. PGIP is constructed at the detection head to learn the refined and dominant scattering point of each SAR airplane instance, thus alleviating the interference from the complex background. We propose two implementations, denoted as PGD and PGD-Lite, and apply them to various existing detectors with different backbones and detection heads. The experiments demonstrate the flexibility and effectiveness of the proposed PGD, which can improve existing detectors on SAR airplane detection with fine-grained classification task (an improvement of 3.1\% mAP most), and achieve the state-of-the-art performance (90.7\% mAP) on SAR-AIRcraft-1.0 dataset. The project is open-source at \url{https://github.com/XAI4SAR/PGD}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12301v1-abstract-full').style.display = 'none'; document.getElementById('2411.12301v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11329">arXiv:2411.11329</a> <span> [<a href="https://arxiv.org/pdf/2411.11329">pdf</a>, <a href="https://arxiv.org/format/2411.11329">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Color-Oriented Redundancy Reduction in Dataset Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yuan%2C+B">Bowen Yuan</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zijian Wang</a>, <a href="/search/?searchtype=author&query=Luo%2C+Y">Yadan Luo</a>, <a href="/search/?searchtype=author&query=Baktashmotlagh%2C+M">Mahsa Baktashmotlagh</a>, <a href="/search/?searchtype=author&query=Luo%2C+Y">Yadan Luo</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zi Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11329v1-abstract-short" style="display: inline;"> Dataset Distillation (DD) is designed to generate condensed representations of extensive image datasets, enhancing training efficiency. Despite recent advances, there remains considerable potential for improvement, particularly in addressing the notable redundancy within the color space of distilled images. In this paper, we propose AutoPalette, a framework that minimizes color redundancy at the i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11329v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11329v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11329v1-abstract-full" style="display: none;"> Dataset Distillation (DD) is designed to generate condensed representations of extensive image datasets, enhancing training efficiency. Despite recent advances, there remains considerable potential for improvement, particularly in addressing the notable redundancy within the color space of distilled images. In this paper, we propose AutoPalette, a framework that minimizes color redundancy at the individual image and overall dataset levels, respectively. At the image level, we employ a palette network, a specialized neural network, to dynamically allocate colors from a reduced color space to each pixel. The palette network identifies essential areas in synthetic images for model training and consequently assigns more unique colors to them. At the dataset level, we develop a color-guided initialization strategy to minimize redundancy among images. Representative images with the least replicated color patterns are selected based on the information gain. A comprehensive performance study involving various datasets and evaluation scenarios is conducted, demonstrating the superior performance of our proposed color-aware DD compared to existing DD methods. The code is available at \url{https://github.com/KeViNYuAn0314/AutoPalette}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11329v1-abstract-full').style.display = 'none'; document.getElementById('2411.11329v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">38th Conference on Neural Information Processing Systems (NeurIPS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11223">arXiv:2411.11223</a> <span> [<a href="https://arxiv.org/pdf/2411.11223">pdf</a>, <a href="https://arxiv.org/format/2411.11223">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient Transfer Learning for Video-language Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+H">Haoxing Chen</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zizheng Huang</a>, <a href="/search/?searchtype=author&query=Hong%2C+Y">Yan Hong</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yanshuo Wang</a>, <a href="/search/?searchtype=author&query=Lyu%2C+Z">Zhongcai Lyu</a>, <a href="/search/?searchtype=author&query=Xu%2C+Z">Zhuoer Xu</a>, <a href="/search/?searchtype=author&query=Lan%2C+J">Jun Lan</a>, <a href="/search/?searchtype=author&query=Gu%2C+Z">Zhangxuan Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11223v1-abstract-short" style="display: inline;"> Pre-trained vision-language models provide a robust foundation for efficient transfer learning across various downstream tasks. In the field of video action recognition, mainstream approaches often introduce additional parameter modules to capture temporal information. While the increased model capacity brought by these additional parameters helps better fit the video-specific inductive biases, ex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11223v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11223v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11223v1-abstract-full" style="display: none;"> Pre-trained vision-language models provide a robust foundation for efficient transfer learning across various downstream tasks. In the field of video action recognition, mainstream approaches often introduce additional parameter modules to capture temporal information. While the increased model capacity brought by these additional parameters helps better fit the video-specific inductive biases, existing methods require learning a large number of parameters and are prone to catastrophic forgetting of the original generalizable knowledge. In this paper, we propose a simple yet effective Multi-modal Spatio-Temporal Adapter (MSTA) to improve the alignment between representations in the text and vision branches, achieving a balance between general knowledge and task-specific knowledge. Furthermore, to mitigate over-fitting and enhance generalizability, we introduce a spatio-temporal description-guided consistency constraint. This constraint involves feeding template inputs (i.e., ``a video of $\{\textbf{cls}\}$'') into the trainable language branch, while LLM-generated spatio-temporal descriptions are input into the pre-trained language branch, enforcing consistency between the outputs of the two branches. This mechanism prevents over-fitting to downstream tasks and improves the distinguishability of the trainable branch within the spatio-temporal semantic space. We evaluate the effectiveness of our approach across four tasks: zero-shot transfer, few-shot learning, base-to-novel generalization, and fully-supervised learning. Compared to many state-of-the-art methods, our MSTA achieves outstanding performance across all evaluations, while using only 2-7\% of the trainable parameters in the original model. Code will be avaliable at https://github.com/chenhaoxing/ETL4Video. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11223v1-abstract-full').style.display = 'none'; document.getElementById('2411.11223v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09615">arXiv:2411.09615</a> <span> [<a href="https://arxiv.org/pdf/2411.09615">pdf</a>, <a href="https://arxiv.org/format/2411.09615">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Soft Condensed Matter">cond-mat.soft</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biological Physics">physics.bio-ph</span> </div> </div> <p class="title is-5 mathjax"> Noise-driven odd elastic waves in living chiral active matter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Choi%2C+S+H">Sang Hyun Choi</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhi-Feng Huang</a>, <a href="/search/?searchtype=author&query=Goldenfeld%2C+N">Nigel Goldenfeld</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09615v1-abstract-short" style="display: inline;"> Chiral active matter is predicted to exhibit odd elasticity, with nontraditional elastic response arising from a combination of chirality, being out of equilibrium, and the presence of nonreciprocal interactions. One of the resulting phenomena is the possible occurrence of odd elastic waves in overdamped systems, although its experimental realization still remains elusive. Here we show that in ove… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09615v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09615v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09615v1-abstract-full" style="display: none;"> Chiral active matter is predicted to exhibit odd elasticity, with nontraditional elastic response arising from a combination of chirality, being out of equilibrium, and the presence of nonreciprocal interactions. One of the resulting phenomena is the possible occurrence of odd elastic waves in overdamped systems, although its experimental realization still remains elusive. Here we show that in overdamped active systems, noise is required to generate persistent elastic waves. In the chiral crystalline phase of active matter, such as that found recently in populations of swimming starfish embryos, the noise arises from self-driving of active particles and their mutual collisions, a key factor that has been missing in previous studies. We identify the criterion for the occurrence of noise-driven odd elastic waves, and postulate the corresponding phase diagram for the general chiral active crystals. Our results can be used to predict the experimental conditions for achieving a transition to self-sustained elastic waves in overdamped active systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09615v1-abstract-full').style.display = 'none'; document.getElementById('2411.09615v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09540">arXiv:2411.09540</a> <span> [<a href="https://arxiv.org/pdf/2411.09540">pdf</a>, <a href="https://arxiv.org/format/2411.09540">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Prompting the Unseen: Detecting Hidden Backdoors in Black-Box Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zi-Xuan Huang</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jia-Wei Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhi-Peng Zhang</a>, <a href="/search/?searchtype=author&query=Yu%2C+C">Chia-Mu Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09540v1-abstract-short" style="display: inline;"> Visual prompting (VP) is a new technique that adapts well-trained frozen models for source domain tasks to target domain tasks. This study examines VP's benefits for black-box model-level backdoor detection. The visual prompt in VP maps class subspaces between source and target domains. We identify a misalignment, termed class subspace inconsistency, between clean and poisoned datasets. Based on t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09540v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09540v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09540v1-abstract-full" style="display: none;"> Visual prompting (VP) is a new technique that adapts well-trained frozen models for source domain tasks to target domain tasks. This study examines VP's benefits for black-box model-level backdoor detection. The visual prompt in VP maps class subspaces between source and target domains. We identify a misalignment, termed class subspace inconsistency, between clean and poisoned datasets. Based on this, we introduce \textsc{BProm}, a black-box model-level detection method to identify backdoors in suspicious models, if any. \textsc{BProm} leverages the low classification accuracy of prompted models when backdoors are present. Extensive experiments confirm \textsc{BProm}'s effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09540v1-abstract-full').style.display = 'none'; document.getElementById('2411.09540v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09007">arXiv:2411.09007</a> <span> [<a href="https://arxiv.org/pdf/2411.09007">pdf</a>, <a href="https://arxiv.org/format/2411.09007">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Scale Contrastive Learning with Selective Attentions for Blind Image Quality Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zihao Huang</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xudong Li</a>, <a href="/search/?searchtype=author&query=Fu%2C+B">Bohan Fu</a>, <a href="/search/?searchtype=author&query=Chu%2C+X">Xiaohui Chu</a>, <a href="/search/?searchtype=author&query=Li%2C+K">Ke Li</a>, <a href="/search/?searchtype=author&query=Shen%2C+Y">Yunhang Shen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09007v1-abstract-short" style="display: inline;"> Blind image quality assessment (BIQA) serves as a fundamental task in computer vision, yet it often fails to consistently align with human subjective perception. Recent advances show that multi-scale evaluation strategies are promising due to their ability to replicate the hierarchical structure of human vision. However, the effectiveness of these strategies is limited by a lack of understanding o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09007v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09007v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09007v1-abstract-full" style="display: none;"> Blind image quality assessment (BIQA) serves as a fundamental task in computer vision, yet it often fails to consistently align with human subjective perception. Recent advances show that multi-scale evaluation strategies are promising due to their ability to replicate the hierarchical structure of human vision. However, the effectiveness of these strategies is limited by a lack of understanding of how different image scales influence perceived quality. This paper addresses two primary challenges: the significant redundancy of information across different scales, and the confusion caused by combining features from these scales, which may vary widely in quality. To this end, a new multi-scale BIQA framework is proposed, namely Contrast-Constrained Scale-Focused IQA Framework (CSFIQA). CSFIQA features a selective focus attention mechanism to minimize information redundancy and highlight critical quality-related information. Additionally, CSFIQA includes a scale-level contrastive learning module equipped with a noise sample matching mechanism to identify quality discrepancies across the same image content at different scales. By exploring the intrinsic relationship between image scales and the perceived quality, the proposed CSFIQA achieves leading performance on eight benchmark datasets, e.g., achieving SRCC values of 0.967 (versus 0.947 in CSIQ) and 0.905 (versus 0.876 in LIVEC). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09007v1-abstract-full').style.display = 'none'; document.getElementById('2411.09007v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08741">arXiv:2411.08741</a> <span> [<a href="https://arxiv.org/pdf/2411.08741">pdf</a>, <a href="https://arxiv.org/ps/2411.08741">ps</a>, <a href="https://arxiv.org/format/2411.08741">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Physics">math-ph</span> </div> </div> <p class="title is-5 mathjax"> Unified analysis of non-Markovian open quantum systems in Gaussian environment using superoperator formalism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhen Huang</a>, <a href="/search/?searchtype=author&query=Lin%2C+L">Lin Lin</a>, <a href="/search/?searchtype=author&query=Park%2C+G">Gunhee Park</a>, <a href="/search/?searchtype=author&query=Zhu%2C+Y">Yuanran Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08741v1-abstract-short" style="display: inline;"> We present perturbative error bounds for the non-Markovian dynamics of observables in open quantum systems interacting with Gaussian environments, governed by general Liouville dynamics. This extends the work of [Mascherpa et al., Phys. Rev. Lett. 118, 100401, 2017], which demonstrated qualitatively tighter bounds over the standard Gr枚nwall-type analysis, where the joint system-environment evoluti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08741v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08741v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08741v1-abstract-full" style="display: none;"> We present perturbative error bounds for the non-Markovian dynamics of observables in open quantum systems interacting with Gaussian environments, governed by general Liouville dynamics. This extends the work of [Mascherpa et al., Phys. Rev. Lett. 118, 100401, 2017], which demonstrated qualitatively tighter bounds over the standard Gr枚nwall-type analysis, where the joint system-environment evolution is unitary. Our results apply to systems with both bosonic and fermionic environments. Our approach utilizes a superoperator formalism, which avoids the need for formal coherent state path integral calculations, or the dilation of Lindblad dynamics into an equivalent unitary framework with infinitely many degrees of freedom. This enables a unified treatment of a wide range of open quantum systems. These findings provide a solid theoretical basis for various recently developed pseudomode methods in simulating open quantum system dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08741v1-abstract-full').style.display = 'none'; document.getElementById('2411.08741v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">46 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08570">arXiv:2411.08570</a> <span> [<a href="https://arxiv.org/pdf/2411.08570">pdf</a>, <a href="https://arxiv.org/format/2411.08570">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Electromagnetic Modeling and Capacity Analysis of Rydberg Atom-Based MIMO System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yuan%2C+S+S+A">Shuai S. A. Yuan</a>, <a href="/search/?searchtype=author&query=Xu%2C+X+Y+I">Xinyi Y. I. Xu</a>, <a href="/search/?searchtype=author&query=Yuan%2C+J">Jinpeng Yuan</a>, <a href="/search/?searchtype=author&query=Xie%2C+G">Guoda Xie</a>, <a href="/search/?searchtype=author&query=Huang%2C+C">Chongwen Huang</a>, <a href="/search/?searchtype=author&query=Chen%2C+X">Xiaoming Chen</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhixiang Huang</a>, <a href="/search/?searchtype=author&query=Sha%2C+W+E+I">Wei E. I. Sha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08570v1-abstract-short" style="display: inline;"> Rydberg atom-based antennas exploit the quantum properties of highly excited Rydberg atoms, providing unique advantages over classical antennas, such as high sensitivity, broad frequency range, and compact size. Despite the increasing interests in their applications in antenna and communication engineering, two key properties, involving the lack of polarization multiplexing and isotropic reception… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08570v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08570v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08570v1-abstract-full" style="display: none;"> Rydberg atom-based antennas exploit the quantum properties of highly excited Rydberg atoms, providing unique advantages over classical antennas, such as high sensitivity, broad frequency range, and compact size. Despite the increasing interests in their applications in antenna and communication engineering, two key properties, involving the lack of polarization multiplexing and isotropic reception without mutual coupling, remain unexplored in the analysis of Rydberg atom-based spatial multiplexing, i.e., multiple-input and multiple-output (MIMO), communications. Generally, the design considerations for any antenna, even for atomic ones, can be extracted to factors such as radiation patterns, efficiency, and polarization, allowing them to be seamlessly integrated into existing system models. In this letter, we extract the antenna properties from relevant quantum characteristics, enabling electromagnetic modeling and capacity analysis of Rydberg MIMO systems in both far-field and near-field scenarios. By employing ray-based method for far-field analysis and dyadic Green's function for near-field calculation, our results indicate that Rydberg atom-based antenna arrays offer specific advantages over classical dipole-type arrays in single-polarization MIMO communications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08570v1-abstract-full').style.display = 'none'; document.getElementById('2411.08570v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08284">arXiv:2411.08284</a> <span> [<a href="https://arxiv.org/pdf/2411.08284">pdf</a>, <a href="https://arxiv.org/format/2411.08284">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Thresholding Algorithm with Memory for Linear Inverse Problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Sun%2C+Z">Zhong-Feng Sun</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Y">Yun-Bin Zhao</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J">Jin-Chuan Zhou</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zheng-Hai Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08284v1-abstract-short" style="display: inline;"> The relaxed optimal $k$-thresholding pursuit (ROTP) is a recent algorithm for linear inverse problems. This algorithm is based on the optimal $k$-thresholding technique which performs vector thresholding and error metric reduction simultaneously. Although ROTP can be used to solve small to medium-sized linear inverse problems, the computational cost of this algorithm is high when solving large-sca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08284v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08284v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08284v1-abstract-full" style="display: none;"> The relaxed optimal $k$-thresholding pursuit (ROTP) is a recent algorithm for linear inverse problems. This algorithm is based on the optimal $k$-thresholding technique which performs vector thresholding and error metric reduction simultaneously. Although ROTP can be used to solve small to medium-sized linear inverse problems, the computational cost of this algorithm is high when solving large-scale problems. By merging the optimal $k$-thresholding technique and iterative method with memory as well as optimization with sparse search directions, we propose the so-called dynamic thresholding algorithm with memory (DTAM), which iteratively and dynamically selects vector bases to construct the problem solution. At every step, the algorithm uses more than one or all iterates generated so far to construct a new search direction, and solves only the small-sized quadratic subproblems at every iteration. Thus the computational complexity of DTAM is remarkably lower than that of ROTP-type methods. It turns out that DTAM can locate the solution of linear inverse problems if the matrix involved satisfies the restricted isometry property. Experiments on synthetic data, audio signal reconstruction and image denoising demonstrate that the proposed algorithm performs comparably to several mainstream thresholding and greedy algorithms, and it works much faster than the ROTP-type algorithms especially when the sparsity level of signal is relatively low. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08284v1-abstract-full').style.display = 'none'; document.getElementById('2411.08284v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08210">arXiv:2411.08210</a> <span> [<a href="https://arxiv.org/pdf/2411.08210">pdf</a>, <a href="https://arxiv.org/format/2411.08210">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> BOSON$^{-1}$: Understanding and Enabling Physically-Robust Photonic Inverse Design with Adaptive Variation-Aware Subspace Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ma%2C+P">Pingchuan Ma</a>, <a href="/search/?searchtype=author&query=Gao%2C+Z">Zhengqi Gao</a>, <a href="/search/?searchtype=author&query=Begovic%2C+A">Amir Begovic</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Meng Zhang</a>, <a href="/search/?searchtype=author&query=Yang%2C+H">Haoyu Yang</a>, <a href="/search/?searchtype=author&query=Ren%2C+H">Haoxing Ren</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z+R">Zhaoran Rena Huang</a>, <a href="/search/?searchtype=author&query=Boning%2C+D">Duane Boning</a>, <a href="/search/?searchtype=author&query=Gu%2C+J">Jiaqi Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08210v1-abstract-short" style="display: inline;"> Nanophotonic device design aims to optimize photonic structures to meet specific requirements across various applications. Inverse design has unlocked non-intuitive, high-dimensional design spaces, enabling the discovery of high-performance devices beyond heuristic or analytic methods. The adjoint method, which calculates gradients for all variables using just two simulations, enables efficient na… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08210v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08210v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08210v1-abstract-full" style="display: none;"> Nanophotonic device design aims to optimize photonic structures to meet specific requirements across various applications. Inverse design has unlocked non-intuitive, high-dimensional design spaces, enabling the discovery of high-performance devices beyond heuristic or analytic methods. The adjoint method, which calculates gradients for all variables using just two simulations, enables efficient navigation of this complex space. However, many inverse-designed structures, while numerically plausible, are difficult to fabricate and sensitive to variations, limiting their practical use. The discrete nature with numerous local-optimal structures also pose significant optimization challenges, often causing gradient-based methods to converge on suboptimal designs. In this work, we formulate inverse design as a fabrication-restricted, discrete, probabilistic optimization problem and introduce BOSON-1, an end-to-end, variation-aware subspace optimization framework to address the challenges of manufacturability, robustness, and optimizability. To overcome optimization difficulty, we propose dense target-enhanced gradient flows to mitigate misleading local optima and introduce a conditional subspace optimization strategy to create high-dimensional tunnels to escape local optima. Furthermore, we significantly reduce the runtime associated with optimizing across exponential variation samples through an adaptive sampling-based robust optimization, ensuring both efficiency and variation robustness. On three representative photonic device benchmarks, our proposed inverse design methodology BOSON^-1 delivers fabricable structures and achieves the best convergence and performance under realistic variations, outperforming prior arts with 74.3% post-fabrication performance. We open-source our codes at https://github.com/ScopeX-ASU/BOSON. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08210v1-abstract-full').style.display = 'none'; document.getElementById('2411.08210v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages. Accepted IEEE DATE 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08189">arXiv:2411.08189</a> <span> [<a href="https://arxiv.org/pdf/2411.08189">pdf</a>, <a href="https://arxiv.org/format/2411.08189">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Astrophysical Phenomena">astro-ph.HE</span> </div> </div> <p class="title is-5 mathjax"> High-Statistics Measurement of the Cosmic-Ray Electron Spectrum with H.E.S.S </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Aharonian%2C+F">F. Aharonian</a>, <a href="/search/?searchtype=author&query=Benkhali%2C+F+A">F. Ait Benkhali</a>, <a href="/search/?searchtype=author&query=Aschersleben%2C+J">J. Aschersleben</a>, <a href="/search/?searchtype=author&query=Ashkar%2C+H">H. Ashkar</a>, <a href="/search/?searchtype=author&query=Backes%2C+M">M. Backes</a>, <a href="/search/?searchtype=author&query=Martins%2C+V+B">V. Barbosa Martins</a>, <a href="/search/?searchtype=author&query=Batzofin%2C+R">R. Batzofin</a>, <a href="/search/?searchtype=author&query=Becherini%2C+Y">Y. Becherini</a>, <a href="/search/?searchtype=author&query=Berge%2C+D">D. Berge</a>, <a href="/search/?searchtype=author&query=Bernl%C3%B6hr%2C+K">K. Bernl枚hr</a>, <a href="/search/?searchtype=author&query=Bi%2C+B">B. Bi</a>, <a href="/search/?searchtype=author&query=B%C3%B6ttcher%2C+M">M. B枚ttcher</a>, <a href="/search/?searchtype=author&query=Boisson%2C+C">C. Boisson</a>, <a href="/search/?searchtype=author&query=Bolmont%2C+J">J. Bolmont</a>, <a href="/search/?searchtype=author&query=de+Lavergne%2C+M+d+B">M. de Bony de Lavergne</a>, <a href="/search/?searchtype=author&query=Borowska%2C+J">J. Borowska</a>, <a href="/search/?searchtype=author&query=Bouyahiaoui%2C+M">M. Bouyahiaoui</a>, <a href="/search/?searchtype=author&query=Brose%2C+R">R. Brose</a>, <a href="/search/?searchtype=author&query=Brown%2C+A">A. Brown</a>, <a href="/search/?searchtype=author&query=Brun%2C+F">F. Brun</a>, <a href="/search/?searchtype=author&query=Bruno%2C+B">B. Bruno</a>, <a href="/search/?searchtype=author&query=Bulik%2C+T">T. Bulik</a>, <a href="/search/?searchtype=author&query=Burger-Scheidlin%2C+C">C. Burger-Scheidlin</a>, <a href="/search/?searchtype=author&query=Bylund%2C+T">T. Bylund</a>, <a href="/search/?searchtype=author&query=Casanova%2C+S">S. Casanova</a> , et al. (123 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08189v1-abstract-short" style="display: inline;"> Owing to their rapid cooling rate and hence loss-limited propagation distance, cosmic-ray electrons and positrons (CRe) at very high energies probe local cosmic-ray accelerators and provide constraints on exotic production mechanisms such as annihilation of dark matter particles. We present a high-statistics measurement of the spectrum of CRe candidate events from 0.3 to 40 TeV with the High Energ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08189v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08189v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08189v1-abstract-full" style="display: none;"> Owing to their rapid cooling rate and hence loss-limited propagation distance, cosmic-ray electrons and positrons (CRe) at very high energies probe local cosmic-ray accelerators and provide constraints on exotic production mechanisms such as annihilation of dark matter particles. We present a high-statistics measurement of the spectrum of CRe candidate events from 0.3 to 40 TeV with the High Energy Stereoscopic System (H.E.S.S.), covering two orders of magnitude in energy and reaching a proton rejection power of better than $10^{4}$. The measured spectrum is well described by a broken power law, with a break around 1 TeV, where the spectral index increases from $螕_1 = 3.25$ $\pm$ 0.02 (stat) $\pm$ 0.2 (sys) to $螕_2 = 4.49$ $\pm$ 0.04 (stat) $\pm$ 0.2 (sys). Apart from the break, the spectrum is featureless. The absence of distinct signatures at multi-TeV energies imposes constraints on the presence of nearby CRe accelerators and the local CRe propagation mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08189v1-abstract-full').style.display = 'none'; document.getElementById('2411.08189v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">main paper: 8 pages, 4 figures, supplemental material: 12 pages, 14 figures, accepted for publication in Physical Review Letters https://journals.aps.org/prl/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07970">arXiv:2411.07970</a> <span> [<a href="https://arxiv.org/pdf/2411.07970">pdf</a>, <a href="https://arxiv.org/format/2411.07970">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> </div> </div> <p class="title is-5 mathjax"> MUltiplexed Survey Telescope: Perspectives for Large-Scale Structure Cosmology in the Era of Stage-V Spectroscopic Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhao%2C+C">Cheng Zhao</a>, <a href="/search/?searchtype=author&query=Huang%2C+S">Song Huang</a>, <a href="/search/?searchtype=author&query=He%2C+M">Mengfan He</a>, <a href="/search/?searchtype=author&query=Montero-Camacho%2C+P">Paulo Montero-Camacho</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yu Liu</a>, <a href="/search/?searchtype=author&query=Renard%2C+P">Pablo Renard</a>, <a href="/search/?searchtype=author&query=Tang%2C+Y">Yunyi Tang</a>, <a href="/search/?searchtype=author&query=Verdier%2C+A">Aurelien Verdier</a>, <a href="/search/?searchtype=author&query=Xu%2C+W">Wenshuo Xu</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xiaorui Yang</a>, <a href="/search/?searchtype=author&query=Yu%2C+J">Jiaxi Yu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yao Zhang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+S">Siyi Zhao</a>, <a href="/search/?searchtype=author&query=Zhou%2C+X">Xingchen Zhou</a>, <a href="/search/?searchtype=author&query=He%2C+S">Shengyu He</a>, <a href="/search/?searchtype=author&query=Kneib%2C+J">Jean-Paul Kneib</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jiayi Li</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Zhuoyang Li</a>, <a href="/search/?searchtype=author&query=Wang%2C+W">Wen-Ting Wang</a>, <a href="/search/?searchtype=author&query=Xianyu%2C+Z">Zhong-Zhi Xianyu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yidian Zhang</a>, <a href="/search/?searchtype=author&query=Gsponer%2C+R">Rafaela Gsponer</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xiao-Dong Li</a>, <a href="/search/?searchtype=author&query=Rocher%2C+A">Antoine Rocher</a>, <a href="/search/?searchtype=author&query=Zou%2C+S">Siwei Zou</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07970v2-abstract-short" style="display: inline;"> The MUltiplexed Survey Telescope (MUST) is a 6.5-meter telescope under development. Dedicated to highly-multiplexed, wide-field spectroscopic surveys, MUST observes over 20,000 targets simultaneously using 6.2-mm pitch positioning robots within a ~5 deg2 field of view. MUST aims to carry out the first Stage-V spectroscopic survey in the 2030s to map the 3D Universe with over 100 million galaxies a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07970v2-abstract-full').style.display = 'inline'; document.getElementById('2411.07970v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07970v2-abstract-full" style="display: none;"> The MUltiplexed Survey Telescope (MUST) is a 6.5-meter telescope under development. Dedicated to highly-multiplexed, wide-field spectroscopic surveys, MUST observes over 20,000 targets simultaneously using 6.2-mm pitch positioning robots within a ~5 deg2 field of view. MUST aims to carry out the first Stage-V spectroscopic survey in the 2030s to map the 3D Universe with over 100 million galaxies and quasars, spanning from the nearby Universe to redshift z~5.5, corresponding to around 1 billion years after the Big Bang. To cover this extensive redshift range, we present an initial conceptual target selection algorithm for different types of galaxies, from local bright galaxies, luminous red galaxies, and emission line galaxies to high-redshift (2 < z < 5.5) Lyman-break galaxies. Using Fisher forecasts, we demonstrate that MUST can address fundamental questions in cosmology, including the nature of dark energy, test of gravity theories, and investigations into primordial physics. This is the first paper in the series of science white papers for MUST, with subsequent developments focusing on additional scientific cases such as galaxy and quasar evolution, Milky Way physics, and dynamic phenomena in the time-domain Universe. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07970v2-abstract-full').style.display = 'none'; document.getElementById('2411.07970v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be submitted to SCPMA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07794">arXiv:2411.07794</a> <span> [<a href="https://arxiv.org/pdf/2411.07794">pdf</a>, <a href="https://arxiv.org/format/2411.07794">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Feature Fusion Transferability Aware Transformer for Unsupervised Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yu%2C+X">Xiaowei Yu</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhe Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07794v1-abstract-short" style="display: inline;"> Unsupervised domain adaptation (UDA) aims to leverage the knowledge learned from labeled source domains to improve performance on the unlabeled target domains. While Convolutional Neural Networks (CNNs) have been dominant in previous UDA methods, recent research has shown promise in applying Vision Transformers (ViTs) to this task. In this study, we propose a novel Feature Fusion Transferability A… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07794v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07794v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07794v1-abstract-full" style="display: none;"> Unsupervised domain adaptation (UDA) aims to leverage the knowledge learned from labeled source domains to improve performance on the unlabeled target domains. While Convolutional Neural Networks (CNNs) have been dominant in previous UDA methods, recent research has shown promise in applying Vision Transformers (ViTs) to this task. In this study, we propose a novel Feature Fusion Transferability Aware Transformer (FFTAT) to enhance ViT performance in UDA tasks. Our method introduces two key innovations: First, we introduce a patch discriminator to evaluate the transferability of patches, generating a transferability matrix. We integrate this matrix into self-attention, directing the model to focus on transferable patches. Second, we propose a feature fusion technique to fuse embeddings in the latent space, enabling each embedding to incorporate information from all others, thereby improving generalization. These two components work in synergy to enhance feature representation learning. Extensive experiments on widely used benchmarks demonstrate that our method significantly improves UDA performance, achieving state-of-the-art (SOTA) results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07794v1-abstract-full').style.display = 'none'; document.getElementById('2411.07794v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07683">arXiv:2411.07683</a> <span> [<a href="https://arxiv.org/pdf/2411.07683">pdf</a>, <a href="https://arxiv.org/format/2411.07683">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Channel Modeling and Environment Reconstruction for Terahertz Monostatic Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lyu%2C+Y">Yejian Lyu</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zeyu Huang</a>, <a href="/search/?searchtype=author&query=Schwarz%2C+S">Stefan Schwarz</a>, <a href="/search/?searchtype=author&query=Han%2C+C">Chong Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07683v1-abstract-short" style="display: inline;"> THz ISAC aims to integrate novel functionalities, such as positioning and environmental sensing, into communication systems. Accurate channel modeling is crucial for the design and performance evaluation of future ISAC systems. In this paper, a THz measurement campaign for monostatic sensing is presented. VNA-based channel measurements are conducted in a laboratory scenario, where the transmitter… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07683v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07683v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07683v1-abstract-full" style="display: none;"> THz ISAC aims to integrate novel functionalities, such as positioning and environmental sensing, into communication systems. Accurate channel modeling is crucial for the design and performance evaluation of future ISAC systems. In this paper, a THz measurement campaign for monostatic sensing is presented. VNA-based channel measurements are conducted in a laboratory scenario, where the transmitter and receiver are positioned together to mimic monostatic sensing. The centering frequency and measured bandwidth for these measurements are 300 GHz and 20 GHz, respectively. A DSS scheme is employed to capture spatial sensing channel profiles. Measurements are conducted across 28 transceiver locations arranged along an 'L'-shaped route. Then, an element-wise SAGE algorithm is used to estimate the MPC parameters, i.e., amplitude and delay. Specular and diffuse reflections are analyzed based on geometric principles and the estimated MPC parameters, where the effects from the radiation pattern are observed. A geometry-based MPC trajectory tracking algorithm is then proposed to classify the MPCs and de-embed the effects of the radiation pattern. Following this algorithm, a hybrid channel model is proposed based on the de-embedded MPC parameters. In this hybrid channel model for monostatic sensing, the MPCs are categorized into target-related and environment-related components. The target-related components are utilized for target detection and identification, while the environment-related ones focus on geometrical scenario reconstruction. A demonstration of geometrical environment reconstruction, along with an analysis of reflection loss for target identification, is subsequently presented. This work offers valuable insights into THz monostatic sensing channel modeling and the design of future THz ISAC systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07683v1-abstract-full').style.display = 'none'; document.getElementById('2411.07683v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07615">arXiv:2411.07615</a> <span> [<a href="https://arxiv.org/pdf/2411.07615">pdf</a>, <a href="https://arxiv.org/format/2411.07615">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Real-time propagation of adaptive sampling selected configuration interaction wave function </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shee%2C+A">Avijit Shee</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhen Huang</a>, <a href="/search/?searchtype=author&query=Head-Gordon%2C+M">Martin Head-Gordon</a>, <a href="/search/?searchtype=author&query=Whaley%2C+K+B">K. Birgitta Whaley</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07615v1-abstract-short" style="display: inline;"> We have developed a new time propagation method, time-dependent adaptive sampling configuration interaction (TD-ASCI), to describe the dynamics of a strongly correlated system. We employ the short iterative Lanczos (SIL) method as the time-integrator, which provides a unitary, norm-conserving, and stable long-time propagation scheme. We used the TD-ASCI method to evaluate the time-domain correlati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07615v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07615v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07615v1-abstract-full" style="display: none;"> We have developed a new time propagation method, time-dependent adaptive sampling configuration interaction (TD-ASCI), to describe the dynamics of a strongly correlated system. We employ the short iterative Lanczos (SIL) method as the time-integrator, which provides a unitary, norm-conserving, and stable long-time propagation scheme. We used the TD-ASCI method to evaluate the time-domain correlation functions of molecular systems. The accuracy of the correlation function was assessed by Fourier transforming (FT) into the frequency domain to compute the dipole-allowed absorption spectra. The FT has been carried out with a short-time signal of the correlation function to reduce the computation time, using an efficient alternative FT scheme based on the ESPRIT signal processing algorithm. We have applied the {TD-ASCI} method to prototypical strongly correlated molecular systems and compared the absorption spectra to spectra evaluated using the equation of motion coupled cluster (EOMCC) method with a truncation at single-doubles-triples (SDT) level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07615v1-abstract-full').style.display = 'none'; document.getElementById('2411.07615v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06702">arXiv:2411.06702</a> <span> [<a href="https://arxiv.org/pdf/2411.06702">pdf</a>, <a href="https://arxiv.org/format/2411.06702">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Track Any Peppers: Weakly Supervised Sweet Pepper Tracking Using VLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lim%2C+J+S">Jia Syuen Lim</a>, <a href="/search/?searchtype=author&query=Luo%2C+Y">Yadan Luo</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhi Chen</a>, <a href="/search/?searchtype=author&query=Wei%2C+T">Tianqi Wei</a>, <a href="/search/?searchtype=author&query=Chapman%2C+S">Scott Chapman</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zi Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06702v1-abstract-short" style="display: inline;"> In the Detection and Multi-Object Tracking of Sweet Peppers Challenge, we present Track Any Peppers (TAP) - a weakly supervised ensemble technique for sweet peppers tracking. TAP leverages the zero-shot detection capabilities of vision-language foundation models like Grounding DINO to automatically generate pseudo-labels for sweet peppers in video sequences with minimal human intervention. These p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06702v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06702v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06702v1-abstract-full" style="display: none;"> In the Detection and Multi-Object Tracking of Sweet Peppers Challenge, we present Track Any Peppers (TAP) - a weakly supervised ensemble technique for sweet peppers tracking. TAP leverages the zero-shot detection capabilities of vision-language foundation models like Grounding DINO to automatically generate pseudo-labels for sweet peppers in video sequences with minimal human intervention. These pseudo-labels, refined when necessary, are used to train a YOLOv8 segmentation network. To enhance detection accuracy under challenging conditions, we incorporate pre-processing techniques such as relighting adjustments and apply depth-based filtering during post-inference. For object tracking, we integrate the Matching by Segment Anything (MASA) adapter with the BoT-SORT algorithm. Our approach achieves a HOTA score of 80.4%, MOTA of 66.1%, Recall of 74.0%, and Precision of 90.7%, demonstrating effective tracking of sweet peppers without extensive manual effort. This work highlights the potential of foundation models for efficient and accurate object detection and tracking in agricultural settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06702v1-abstract-full').style.display = 'none'; document.getElementById('2411.06702v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06376">arXiv:2411.06376</a> <span> [<a href="https://arxiv.org/pdf/2411.06376">pdf</a>, <a href="https://arxiv.org/format/2411.06376">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Phantom: Constraining Generative Artificial Intelligence Models for Practical Domain Specific Peripherals Trace Synthesizing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhibai Huang</a>, <a href="/search/?searchtype=author&query=Shen%2C+Y">Yihan Shen</a>, <a href="/search/?searchtype=author&query=Xie%2C+Y">Yongchen Xie</a>, <a href="/search/?searchtype=author&query=Wei%2C+Z">Zhixiang Wei</a>, <a href="/search/?searchtype=author&query=wang%2C+Y">Yun wang</a>, <a href="/search/?searchtype=author&query=Liu%2C+F">Fangxin Liu</a>, <a href="/search/?searchtype=author&query=Song%2C+T">Tao Song</a>, <a href="/search/?searchtype=author&query=Qi%2C+Z">Zhengwei Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06376v1-abstract-short" style="display: inline;"> Peripheral Component Interconnect Express (PCIe) is the de facto interconnect standard for high-speed peripherals and CPUs. Prototyping and optimizing PCIe devices for emerging scenarios is an ongoing challenge. Since Transaction Layer Packets (TLPs) capture device-CPU interactions, it is crucial to analyze and generate realistic TLP traces for effective device design and optimization. Generative… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06376v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06376v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06376v1-abstract-full" style="display: none;"> Peripheral Component Interconnect Express (PCIe) is the de facto interconnect standard for high-speed peripherals and CPUs. Prototyping and optimizing PCIe devices for emerging scenarios is an ongoing challenge. Since Transaction Layer Packets (TLPs) capture device-CPU interactions, it is crucial to analyze and generate realistic TLP traces for effective device design and optimization. Generative AI offers a promising approach for creating intricate, custom TLP traces necessary for PCIe hardware and software development. However, existing models often generate impractical traces due to the absence of PCIe-specific constraints, such as TLP ordering and causality. This paper presents Phantom, the first framework that treats TLP trace generation as a generative AI problem while incorporating PCIe-specific constraints. We validate Phantom's effectiveness by generating TLP traces for an actual PCIe network interface card. Experimental results show that Phantom produces practical, large-scale TLP traces, significantly outperforming existing models, with improvements of up to 1000$\times$ in task-specific metrics and up to 2.19$\times$ in Frechet Inception Distance (FID) compared to backbone-only methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06376v1-abstract-full').style.display = 'none'; document.getElementById('2411.06376v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06221">arXiv:2411.06221</a> <span> [<a href="https://arxiv.org/pdf/2411.06221">pdf</a>, <a href="https://arxiv.org/format/2411.06221">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Smart-LLaMA: Two-Stage Post-Training of Large Language Models for Smart Contract Vulnerability Detection and Explanation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yu%2C+L">Lei Yu</a>, <a href="/search/?searchtype=author&query=Chen%2C+S">Shiqi Chen</a>, <a href="/search/?searchtype=author&query=Yuan%2C+H">Hang Yuan</a>, <a href="/search/?searchtype=author&query=Wang%2C+P">Peng Wang</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhirong Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jingyuan Zhang</a>, <a href="/search/?searchtype=author&query=Shen%2C+C">Chenjie Shen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+F">Fengjun Zhang</a>, <a href="/search/?searchtype=author&query=Yang%2C+L">Li Yang</a>, <a href="/search/?searchtype=author&query=Ma%2C+J">Jiajia Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06221v1-abstract-short" style="display: inline;"> With the rapid development of blockchain technology, smart contract security has become a critical challenge. Existing smart contract vulnerability detection methods face three main issues: (1) Insufficient quality of datasets, lacking detailed explanations and precise vulnerability locations. (2) Limited adaptability of large language models (LLMs) to the smart contract domain, as most LLMs are p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06221v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06221v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06221v1-abstract-full" style="display: none;"> With the rapid development of blockchain technology, smart contract security has become a critical challenge. Existing smart contract vulnerability detection methods face three main issues: (1) Insufficient quality of datasets, lacking detailed explanations and precise vulnerability locations. (2) Limited adaptability of large language models (LLMs) to the smart contract domain, as most LLMs are pre-trained on general text data but minimal smart contract-specific data. (3) Lack of high-quality explanations for detected vulnerabilities, as existing methods focus solely on detection without clear explanations. These limitations hinder detection performance and make it harder for developers to understand and fix vulnerabilities quickly, potentially leading to severe financial losses. To address these problems, we propose Smart-LLaMA, an advanced detection method based on the LLaMA language model. First, we construct a comprehensive dataset covering four vulnerability types with labels, detailed explanations, and precise vulnerability locations. Second, we introduce Smart Contract-Specific Continual Pre-Training, using raw smart contract data to enable the LLM to learn smart contract syntax and semantics, enhancing their domain adaptability. Furthermore, we propose Explanation-Guided Fine-Tuning, which fine-tunes the LLM using paired vulnerable code and explanations, enabling both vulnerability detection and reasoned explanations. We evaluate explanation quality through LLM and human evaluation, focusing on Correctness, Completeness, and Conciseness. Experimental results show that Smart-LLaMA outperforms state-of-the-art baselines, with average improvements of 6.49% in F1 score and 3.78% in accuracy, while providing reliable explanations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06221v1-abstract-full').style.display = 'none'; document.getElementById('2411.06221v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06172">arXiv:2411.06172</a> <span> [<a href="https://arxiv.org/pdf/2411.06172">pdf</a>, <a href="https://arxiv.org/format/2411.06172">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> IDU-Detector: A Synergistic Framework for Robust Masquerader Attack Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zilin Huang</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xiulai Li</a>, <a href="/search/?searchtype=author&query=Cao%2C+X">Xinyi Cao</a>, <a href="/search/?searchtype=author&query=Chen%2C+K">Ke Chen</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Longjuan Wang</a>, <a href="/search/?searchtype=author&query=Liu%2C+L+B">Logan Bo-Yee Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06172v1-abstract-short" style="display: inline;"> In the digital age, users store personal data in corporate databases, making data security central to enterprise management. Given the extensive attack surface, assets face challenges like weak authentication, vulnerabilities, and malware. Attackers may exploit vulnerabilities to gain unauthorized access, masquerading as legitimate users. Such attacks can lead to privacy breaches, business disrupt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06172v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06172v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06172v1-abstract-full" style="display: none;"> In the digital age, users store personal data in corporate databases, making data security central to enterprise management. Given the extensive attack surface, assets face challenges like weak authentication, vulnerabilities, and malware. Attackers may exploit vulnerabilities to gain unauthorized access, masquerading as legitimate users. Such attacks can lead to privacy breaches, business disruption, financial losses, and reputational damage. Complex attack vectors blur lines between insider and external threats. To address this, we introduce the IDU-Detector, integrating Intrusion Detection Systems (IDS) with User and Entity Behavior Analytics (UEBA). This integration monitors unauthorized access, bridges system gaps, ensures continuous monitoring, and enhances threat identification. Existing insider threat datasets lack depth and coverage of diverse attack vectors. This hinders detection technologies from addressing complex attack surfaces. We propose new, diverse datasets covering more attack scenarios, enhancing detection technologies. Testing our framework, the IDU-Detector achieved average accuracies of 98.96% and 99.12%. These results show effectiveness in detecting attacks, improving security and response speed, and providing higher asset safety assurance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06172v1-abstract-full').style.display = 'none'; document.getElementById('2411.06172v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05027">arXiv:2411.05027</a> <span> [<a href="https://arxiv.org/pdf/2411.05027">pdf</a>, <a href="https://arxiv.org/format/2411.05027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MGRS.2024.3483459">10.1109/MGRS.2024.3483459 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Generative Artificial Intelligence Meets Synthetic Aperture Radar: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhongling Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xidan Zhang</a>, <a href="/search/?searchtype=author&query=Tang%2C+Z">Zuqian Tang</a>, <a href="/search/?searchtype=author&query=Xu%2C+F">Feng Xu</a>, <a href="/search/?searchtype=author&query=Datcu%2C+M">Mihai Datcu</a>, <a href="/search/?searchtype=author&query=Han%2C+J">Junwei Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05027v1-abstract-short" style="display: inline;"> SAR images possess unique attributes that present challenges for both human observers and vision AI models to interpret, owing to their electromagnetic characteristics. The interpretation of SAR images encounters various hurdles, with one of the primary obstacles being the data itself, which includes issues related to both the quantity and quality of the data. The challenges can be addressed using… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05027v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05027v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05027v1-abstract-full" style="display: none;"> SAR images possess unique attributes that present challenges for both human observers and vision AI models to interpret, owing to their electromagnetic characteristics. The interpretation of SAR images encounters various hurdles, with one of the primary obstacles being the data itself, which includes issues related to both the quantity and quality of the data. The challenges can be addressed using generative AI technologies. Generative AI, often known as GenAI, is a very advanced and powerful technology in the field of artificial intelligence that has gained significant attention. The advancement has created possibilities for the creation of texts, photorealistic pictures, videos, and material in various modalities. This paper aims to comprehensively investigate the intersection of GenAI and SAR. First, we illustrate the common data generation-based applications in SAR field and compare them with computer vision tasks, analyzing the similarity, difference, and general challenges of them. Then, an overview of the latest GenAI models is systematically reviewed, including various basic models and their variations targeting the general challenges. Additionally, the corresponding applications in SAR domain are also included. Specifically, we propose to summarize the physical model based simulation approaches for SAR, and analyze the hybrid modeling methods that combine the GenAI and interpretable models. The evaluation methods that have been or could be applied to SAR, are also explored. Finally, the potential challenges and future prospects are discussed. To our best knowledge, this survey is the first exhaustive examination of the interdiscipline of SAR and GenAI, encompassing a wide range of topics, including deep neural networks, physical models, computer vision, and SAR images. The resources of this survey are open-source at \url{https://github.com/XAI4SAR/GenAIxSAR}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05027v1-abstract-full').style.display = 'none'; document.getElementById('2411.05027v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03711">arXiv:2411.03711</a> <span> [<a href="https://arxiv.org/pdf/2411.03711">pdf</a>, <a href="https://arxiv.org/format/2411.03711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Multi-Modal Intelligent Channel Modeling: A New Modeling Paradigm via Synesthesia of Machines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Bai%2C+L">Lu Bai</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Ziwei Huang</a>, <a href="/search/?searchtype=author&query=Sun%2C+M">Mingran Sun</a>, <a href="/search/?searchtype=author&query=Cheng%2C+X">Xiang Cheng</a>, <a href="/search/?searchtype=author&query=Cui%2C+L">Lizhen Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03711v1-abstract-short" style="display: inline;"> In the future sixth-generation (6G) era, to support accurate localization sensing and efficient communication link establishment for intelligent agents, a comprehensive understanding of the surrounding environment and proper channel modeling are indispensable. The existing method, which solely exploits radio frequency (RF) communication information, is difficult to accomplish accurate channel mode… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03711v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03711v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03711v1-abstract-full" style="display: none;"> In the future sixth-generation (6G) era, to support accurate localization sensing and efficient communication link establishment for intelligent agents, a comprehensive understanding of the surrounding environment and proper channel modeling are indispensable. The existing method, which solely exploits radio frequency (RF) communication information, is difficult to accomplish accurate channel modeling. Fortunately, multi-modal devices are deployed on intelligent agents to obtain environmental features, which could further assist in channel modeling. Currently, some research efforts have been devoted to utilizing multi-modal information to facilitate channel modeling, while still lack a comprehensive review. To fill this gap, we embark on an initial endeavor with the goal of reviewing multi-modal intelligent channel modeling (MMICM) via Synesthesia of Machines (SoM). Compared to channel modeling approaches that solely utilize RF communication information, the utilization of multi-modal information can provide a more in-depth understanding of the propagation environment around the transceiver, thus facilitating more accurate channel modeling. First, this paper introduces existing channel modeling approaches from the perspective of the channel modeling evolution. Then, we have elaborated and investigated recent advances in the topic of capturing typical channel characteristics and features, i.e., channel non-stationarity and consistency, by characterizing the mathematical, spatial, coupling, and mapping relationships. In addition, applications that can be supported by MMICM are summarized and analyzed. To corroborate the superiority of MMICM via SoM, we give the simulation result and analysis. Finally, some open issues and potential directions for the MMICM are outlined from the perspectives of measurements, modeling, and applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03711v1-abstract-full').style.display = 'none'; document.getElementById('2411.03711v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03672">arXiv:2411.03672</a> <span> [<a href="https://arxiv.org/pdf/2411.03672">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards 3D Semantic Scene Completion for Autonomous Driving: A Meta-Learning Framework Empowered by Deformable Large-Kernel Attention and Mamba Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Qu%2C+Y">Yansong Qu</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zilin Huang</a>, <a href="/search/?searchtype=author&query=Sheng%2C+Z">Zihao Sheng</a>, <a href="/search/?searchtype=author&query=Chen%2C+T">Tiantian Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+S">Sikai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03672v1-abstract-short" style="display: inline;"> Semantic scene completion (SSC) is essential for achieving comprehensive perception in autonomous driving systems. However, existing SSC methods often overlook the high deployment costs in real-world applications. Traditional architectures, such as 3D Convolutional Neural Networks (3D CNNs) and self-attention mechanisms, face challenges in efficiently capturing long-range dependencies within 3D vo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03672v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03672v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03672v1-abstract-full" style="display: none;"> Semantic scene completion (SSC) is essential for achieving comprehensive perception in autonomous driving systems. However, existing SSC methods often overlook the high deployment costs in real-world applications. Traditional architectures, such as 3D Convolutional Neural Networks (3D CNNs) and self-attention mechanisms, face challenges in efficiently capturing long-range dependencies within 3D voxel grids, limiting their effectiveness. To address these issues, we introduce MetaSSC, a novel meta-learning-based framework for SSC that leverages deformable convolution, large-kernel attention, and the Mamba (D-LKA-M) model. Our approach begins with a voxel-based semantic segmentation (SS) pretraining task, aimed at exploring the semantics and geometry of incomplete regions while acquiring transferable meta-knowledge. Using simulated cooperative perception datasets, we supervise the perception training of a single vehicle using aggregated sensor data from multiple nearby connected autonomous vehicles (CAVs), generating richer and more comprehensive labels. This meta-knowledge is then adapted to the target domain through a dual-phase training strategy that does not add extra model parameters, enabling efficient deployment. To further enhance the model's capability in capturing long-sequence relationships within 3D voxel grids, we integrate Mamba blocks with deformable convolution and large-kernel attention into the backbone network. Extensive experiments demonstrate that MetaSSC achieves state-of-the-art performance, significantly outperforming competing models while also reducing deployment costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03672v1-abstract-full').style.display = 'none'; document.getElementById('2411.03672v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03670">arXiv:2411.03670</a> <span> [<a href="https://arxiv.org/pdf/2411.03670">pdf</a>, <a href="https://arxiv.org/format/2411.03670">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Touchstone Benchmark: Are We on the Right Way for Evaluating AI Algorithms for Medical Segmentation? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Bassi%2C+P+R+A+S">Pedro R. A. S. Bassi</a>, <a href="/search/?searchtype=author&query=Li%2C+W">Wenxuan Li</a>, <a href="/search/?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/?searchtype=author&query=Isensee%2C+F">Fabian Isensee</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zifu Wang</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jieneng Chen</a>, <a href="/search/?searchtype=author&query=Chou%2C+Y">Yu-Cheng Chou</a>, <a href="/search/?searchtype=author&query=Kirchhoff%2C+Y">Yannick Kirchhoff</a>, <a href="/search/?searchtype=author&query=Rokuss%2C+M">Maximilian Rokuss</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Ziyan Huang</a>, <a href="/search/?searchtype=author&query=Ye%2C+J">Jin Ye</a>, <a href="/search/?searchtype=author&query=He%2C+J">Junjun He</a>, <a href="/search/?searchtype=author&query=Wald%2C+T">Tassilo Wald</a>, <a href="/search/?searchtype=author&query=Ulrich%2C+C">Constantin Ulrich</a>, <a href="/search/?searchtype=author&query=Baumgartner%2C+M">Michael Baumgartner</a>, <a href="/search/?searchtype=author&query=Roy%2C+S">Saikat Roy</a>, <a href="/search/?searchtype=author&query=Maier-Hein%2C+K+H">Klaus H. Maier-Hein</a>, <a href="/search/?searchtype=author&query=Jaeger%2C+P">Paul Jaeger</a>, <a href="/search/?searchtype=author&query=Ye%2C+Y">Yiwen Ye</a>, <a href="/search/?searchtype=author&query=Xie%2C+Y">Yutong Xie</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jianpeng Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Ziyang Chen</a>, <a href="/search/?searchtype=author&query=Xia%2C+Y">Yong Xia</a>, <a href="/search/?searchtype=author&query=Xing%2C+Z">Zhaohu Xing</a>, <a href="/search/?searchtype=author&query=Zhu%2C+L">Lei Zhu</a> , et al. (28 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03670v1-abstract-short" style="display: inline;"> How can we test AI performance? This question seems trivial, but it isn't. Standard benchmarks often have problems such as in-distribution and small-size test sets, oversimplified metrics, unfair comparisons, and short-term outcome pressure. As a consequence, good performance on standard benchmarks does not guarantee success in real-world scenarios. To address these problems, we present Touchstone… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03670v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03670v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03670v1-abstract-full" style="display: none;"> How can we test AI performance? This question seems trivial, but it isn't. Standard benchmarks often have problems such as in-distribution and small-size test sets, oversimplified metrics, unfair comparisons, and short-term outcome pressure. As a consequence, good performance on standard benchmarks does not guarantee success in real-world scenarios. To address these problems, we present Touchstone, a large-scale collaborative segmentation benchmark of 9 types of abdominal organs. This benchmark is based on 5,195 training CT scans from 76 hospitals around the world and 5,903 testing CT scans from 11 additional hospitals. This diverse test set enhances the statistical significance of benchmark results and rigorously evaluates AI algorithms across various out-of-distribution scenarios. We invited 14 inventors of 19 AI algorithms to train their algorithms, while our team, as a third party, independently evaluated these algorithms on three test sets. In addition, we also evaluated pre-existing AI frameworks--which, differing from algorithms, are more flexible and can support different algorithms--including MONAI from NVIDIA, nnU-Net from DKFZ, and numerous other open-source frameworks. We are committed to expanding this benchmark to encourage more innovation of AI algorithms for the medical domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03670v1-abstract-full').style.display = 'none'; document.getElementById('2411.03670v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS-2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03313">arXiv:2411.03313</a> <span> [<a href="https://arxiv.org/pdf/2411.03313">pdf</a>, <a href="https://arxiv.org/format/2411.03313">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Classification Done Right for Vision-Language Pre-Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zilong Huang</a>, <a href="/search/?searchtype=author&query=Ye%2C+Q">Qinghao Ye</a>, <a href="/search/?searchtype=author&query=Kang%2C+B">Bingyi Kang</a>, <a href="/search/?searchtype=author&query=Feng%2C+J">Jiashi Feng</a>, <a href="/search/?searchtype=author&query=Fan%2C+H">Haoqi Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03313v2-abstract-short" style="display: inline;"> We introduce SuperClass, a super simple classification method for vision-language pre-training on image-text data. Unlike its contrastive counterpart CLIP who contrast with a text encoder, SuperClass directly utilizes tokenized raw text as supervised classification labels, without the need for additional text filtering or selection. Due to the absence of the text encoding as contrastive target, Su… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03313v2-abstract-full').style.display = 'inline'; document.getElementById('2411.03313v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03313v2-abstract-full" style="display: none;"> We introduce SuperClass, a super simple classification method for vision-language pre-training on image-text data. Unlike its contrastive counterpart CLIP who contrast with a text encoder, SuperClass directly utilizes tokenized raw text as supervised classification labels, without the need for additional text filtering or selection. Due to the absence of the text encoding as contrastive target, SuperClass does not require a text encoder and does not need to maintain a large batch size as CLIP does. SuperClass demonstrated superior performance on various downstream tasks, including classic computer vision benchmarks and vision language downstream tasks. We further explored the scaling behavior of SuperClass on model size, training length, or data size, and reported encouraging results and comparisons to CLIP. https://github.com/x-cls/superclass <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03313v2-abstract-full').style.display = 'none'; document.getElementById('2411.03313v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02445">arXiv:2411.02445</a> <span> [<a href="https://arxiv.org/pdf/2411.02445">pdf</a>, <a href="https://arxiv.org/format/2411.02445">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> WiCV@CVPR2024: The Thirteenth Women In Computer Vision Workshop at the Annual CVPR Conference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Aslam%2C+A">Asra Aslam</a>, <a href="/search/?searchtype=author&query=Herath%2C+S">Sachini Herath</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Ziqi Huang</a>, <a href="/search/?searchtype=author&query=Talavera%2C+E">Estefania Talavera</a>, <a href="/search/?searchtype=author&query=Bhattacharjee%2C+D">Deblina Bhattacharjee</a>, <a href="/search/?searchtype=author&query=Mittal%2C+H">Himangi Mittal</a>, <a href="/search/?searchtype=author&query=Staderini%2C+V">Vanessa Staderini</a>, <a href="/search/?searchtype=author&query=Ren%2C+M">Mengwei Ren</a>, <a href="/search/?searchtype=author&query=Farshad%2C+A">Azade Farshad</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02445v1-abstract-short" style="display: inline;"> In this paper, we present the details of Women in Computer Vision Workshop - WiCV 2024, organized alongside the CVPR 2024 in Seattle, Washington, United States. WiCV aims to amplify the voices of underrepresented women in the computer vision community, fostering increased visibility in both academia and industry. We believe that such events play a vital role in addressing gender imbalances within… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02445v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02445v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02445v1-abstract-full" style="display: none;"> In this paper, we present the details of Women in Computer Vision Workshop - WiCV 2024, organized alongside the CVPR 2024 in Seattle, Washington, United States. WiCV aims to amplify the voices of underrepresented women in the computer vision community, fostering increased visibility in both academia and industry. We believe that such events play a vital role in addressing gender imbalances within the field. The annual WiCV@CVPR workshop offers a)~opportunity for collaboration between researchers from minority groups, b) mentorship for female junior researchers, c) financial support to presenters to alleviate financial burdens and d)~a diverse array of role models who can inspire younger researchers at the outset of their careers. In this paper, we present a comprehensive report on the workshop program, historical trends from the past WiCV@CVPR events, and a summary of statistics related to presenters, attendees, and sponsorship for the WiCV 2024 workshop. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02445v1-abstract-full').style.display = 'none'; document.getElementById('2411.02445v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2309.12768</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01893">arXiv:2411.01893</a> <span> [<a href="https://arxiv.org/pdf/2411.01893">pdf</a>, <a href="https://arxiv.org/format/2411.01893">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Global Depth-Range-Free Multi-View Stereo Transformer Network with Pose Embedding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Dong%2C+Y">Yitong Dong</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yijin Li</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhaoyang Huang</a>, <a href="/search/?searchtype=author&query=Bian%2C+W">Weikang Bian</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jingbo Liu</a>, <a href="/search/?searchtype=author&query=Bao%2C+H">Hujun Bao</a>, <a href="/search/?searchtype=author&query=Cui%2C+Z">Zhaopeng Cui</a>, <a href="/search/?searchtype=author&query=Li%2C+H">Hongsheng Li</a>, <a href="/search/?searchtype=author&query=Zhang%2C+G">Guofeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01893v1-abstract-short" style="display: inline;"> In this paper, we propose a novel multi-view stereo (MVS) framework that gets rid of the depth range prior. Unlike recent prior-free MVS methods that work in a pair-wise manner, our method simultaneously considers all the source images. Specifically, we introduce a Multi-view Disparity Attention (MDA) module to aggregate long-range context information within and across multi-view images. Consideri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01893v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01893v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01893v1-abstract-full" style="display: none;"> In this paper, we propose a novel multi-view stereo (MVS) framework that gets rid of the depth range prior. Unlike recent prior-free MVS methods that work in a pair-wise manner, our method simultaneously considers all the source images. Specifically, we introduce a Multi-view Disparity Attention (MDA) module to aggregate long-range context information within and across multi-view images. Considering the asymmetry of the epipolar disparity flow, the key to our method lies in accurately modeling multi-view geometric constraints. We integrate pose embedding to encapsulate information such as multi-view camera poses, providing implicit geometric constraints for multi-view disparity feature fusion dominated by attention. Additionally, we construct corresponding hidden states for each source image due to significant differences in the observation quality of the same pixel in the reference frame across multiple source frames. We explicitly estimate the quality of the current pixel corresponding to sampled points on the epipolar line of the source image and dynamically update hidden states through the uncertainty estimation module. Extensive results on the DTU dataset and Tanks&Temple benchmark demonstrate the effectiveness of our method. The code is available at our project page: https://zju3dv.github.io/GD-PoseMVS/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01893v1-abstract-full').style.display = 'none'; document.getElementById('2411.01893v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01779">arXiv:2411.01779</a> <span> [<a href="https://arxiv.org/pdf/2411.01779">pdf</a>, <a href="https://arxiv.org/format/2411.01779">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> TabSec: A Collaborative Framework for Novel Insider Threat Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zilin Huang</a>, <a href="/search/?searchtype=author&query=Tang%2C+X">Xiangyan Tang</a>, <a href="/search/?searchtype=author&query=Li%2C+H">Hongyu Li</a>, <a href="/search/?searchtype=author&query=Cao%2C+X">Xinyi Cao</a>, <a href="/search/?searchtype=author&query=Cheng%2C+J">Jieren Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01779v1-abstract-short" style="display: inline;"> In the era of the Internet of Things (IoT) and data sharing, users frequently upload their personal information to enterprise databases to enjoy enhanced service experiences provided by various online services. However, the widespread presence of system vulnerabilities, remote network intrusions, and insider threats significantly increases the exposure of private enterprise data on the internet. I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01779v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01779v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01779v1-abstract-full" style="display: none;"> In the era of the Internet of Things (IoT) and data sharing, users frequently upload their personal information to enterprise databases to enjoy enhanced service experiences provided by various online services. However, the widespread presence of system vulnerabilities, remote network intrusions, and insider threats significantly increases the exposure of private enterprise data on the internet. If such data is stolen or leaked by attackers, it can result in severe asset losses and business operation disruptions. To address these challenges, this paper proposes a novel threat detection framework, TabITD. This framework integrates Intrusion Detection Systems (IDS) with User and Entity Behavior Analytics (UEBA) strategies to form a collaborative detection system that bridges the gaps in existing systems' capabilities. It effectively addresses the blurred boundaries between external and insider threats caused by the diversification of attack methods, thereby enhancing the model's learning ability and overall detection performance. Moreover, the proposed method leverages the TabNet architecture, which employs a sparse attention feature selection mechanism that allows TabNet to select the most relevant features at each decision step, thereby improving the detection of rare-class attacks. We evaluated our proposed solution on two different datasets, achieving average accuracies of 96.71% and 97.25%, respectively. The results demonstrate that this approach can effectively detect malicious behaviors such as masquerade attacks and external threats, significantly enhancing network security defenses and the efficiency of network attack detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01779v1-abstract-full').style.display = 'none'; document.getElementById('2411.01779v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01600">arXiv:2411.01600</a> <span> [<a href="https://arxiv.org/pdf/2411.01600">pdf</a>, <a href="https://arxiv.org/format/2411.01600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Graph Fourier Neural ODEs: Bridging Spatial and Temporal Multiscales in Molecular Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Sun%2C+F">Fang Sun</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zijie Huang</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Haixin Wang</a>, <a href="/search/?searchtype=author&query=Cao%2C+Y">Yadi Cao</a>, <a href="/search/?searchtype=author&query=Luo%2C+X">Xiao Luo</a>, <a href="/search/?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/?searchtype=author&query=Sun%2C+Y">Yizhou Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01600v1-abstract-short" style="display: inline;"> Molecular dynamics simulations are crucial for understanding complex physical, chemical, and biological processes at the atomic level. However, accurately capturing interactions across multiple spatial and temporal scales remains a significant challenge. We present a novel framework that jointly models spatial and temporal multiscale interactions in molecular dynamics. Our approach leverages Graph… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01600v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01600v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01600v1-abstract-full" style="display: none;"> Molecular dynamics simulations are crucial for understanding complex physical, chemical, and biological processes at the atomic level. However, accurately capturing interactions across multiple spatial and temporal scales remains a significant challenge. We present a novel framework that jointly models spatial and temporal multiscale interactions in molecular dynamics. Our approach leverages Graph Fourier Transforms to decompose molecular structures into different spatial scales and employs Neural Ordinary Differential Equations to model the temporal dynamics in a curated manner influenced by the spatial modes. This unified framework links spatial structures with temporal evolution in a flexible manner, enabling more accurate and comprehensive simulations of molecular systems. We evaluate our model on the MD17 dataset, demonstrating consistent performance improvements over state-of-the-art baselines across multiple molecules, particularly under challenging conditions such as irregular timestep sampling and long-term prediction horizons. Ablation studies confirm the significant contributions of both spatial and temporal multiscale modeling components. Our method advances the simulation of complex molecular systems, potentially accelerating research in computational chemistry, drug discovery, and materials science. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01600v1-abstract-full').style.display = 'none'; document.getElementById('2411.01600v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01215">arXiv:2411.01215</a> <span> [<a href="https://arxiv.org/pdf/2411.01215">pdf</a>, <a href="https://arxiv.org/format/2411.01215">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Astrophysical Phenomena">astro-ph.HE</span> </div> </div> <p class="title is-5 mathjax"> Detection of two TeV gamma-ray outbursts from NGC 1275 by LHAASO </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhen Cao</a>, <a href="/search/?searchtype=author&query=Aharonian%2C+F">F. Aharonian</a>, <a href="/search/?searchtype=author&query=Axikegu"> Axikegu</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y+X">Y. X. Bai</a>, <a href="/search/?searchtype=author&query=Bao%2C+Y+W">Y. W. Bao</a>, <a href="/search/?searchtype=author&query=Bastieri%2C+D">D. Bastieri</a>, <a href="/search/?searchtype=author&query=Bi%2C+X+J">X. J. Bi</a>, <a href="/search/?searchtype=author&query=Bi%2C+Y+J">Y. J. Bi</a>, <a href="/search/?searchtype=author&query=Cai%2C+J+T">J. T. Cai</a>, <a href="/search/?searchtype=author&query=Cao%2C+Q">Q. Cao</a>, <a href="/search/?searchtype=author&query=Cao%2C+W+Y">W. Y. Cao</a>, <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhe Cao</a>, <a href="/search/?searchtype=author&query=Chang%2C+J">J. Chang</a>, <a href="/search/?searchtype=author&query=Chang%2C+J+F">J. F. Chang</a>, <a href="/search/?searchtype=author&query=Chen%2C+A+M">A. M. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+E+S">E. S. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Liang Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Lin Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Long Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+M+J">M. J. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+M+L">M. L. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+Q+H">Q. H. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+S+H">S. H. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+S+Z">S. Z. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+T+L">T. L. Chen</a> , et al. (254 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01215v2-abstract-short" style="display: inline;"> The Water Cherenkov Detector Array (WCDA) is one of the components of Large High Altitude Air Shower Observatory (LHAASO) and can monitor any sources over two-thirds of the sky for up to 7 hours per day with >98\% duty cycle. In this work, we report the detection of two outbursts of the Fanaroff-Riley I radio galaxy NGC 1275 that were detected by LHAASO-WCDA between November 2022 and January 2023… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01215v2-abstract-full').style.display = 'inline'; document.getElementById('2411.01215v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01215v2-abstract-full" style="display: none;"> The Water Cherenkov Detector Array (WCDA) is one of the components of Large High Altitude Air Shower Observatory (LHAASO) and can monitor any sources over two-thirds of the sky for up to 7 hours per day with >98\% duty cycle. In this work, we report the detection of two outbursts of the Fanaroff-Riley I radio galaxy NGC 1275 that were detected by LHAASO-WCDA between November 2022 and January 2023 with statistical significance of 5.2~$蟽$ and 8.3~$蟽$. The observed spectral energy distribution in the range from 500 GeV to 3 TeV is fitted by a power-law with a best-fit spectral index of $伪=-3.37\pm0.52$ and $-3.35\pm0.29$, respectively. The outburst flux above 0.5~TeV was ($4.55\pm 4.21)\times~10^{-11}~\rm cm^{-2}~s^{-1}$ and ($3.45\pm 1.78)\times~10^{-11}~\rm cm^{-2}~s^{-1}$, corresponding to 60\%, 45\% of Crab Nebula flux. Variation analysis reveals the variability time-scale of days at the TeV energy band. A simple test by one-zone synchrotron self-Compton model reproduces the data in the gamma-ray band well. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01215v2-abstract-full').style.display = 'none'; document.getElementById('2411.01215v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 8 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01191">arXiv:2411.01191</a> <span> [<a href="https://arxiv.org/pdf/2411.01191">pdf</a>, <a href="https://arxiv.org/format/2411.01191">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Prophet Secretary and Matching: the Significance of the Largest Item </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+Z">Ziyun Chen</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhiyi Huang</a>, <a href="/search/?searchtype=author&query=Li%2C+D">Dongchen Li</a>, <a href="/search/?searchtype=author&query=Tang%2C+Z+G">Zhihao Gavin Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01191v1-abstract-short" style="display: inline;"> The prophet secretary problem is a combination of the prophet inequality and the secretary problem, where elements are drawn from known independent distributions and arrive in uniformly random order. In this work, we design 1) a $0.688$-competitive algorithm, that breaks the $0.675$ barrier of blind strategies (Correa, Saona, Ziliotto, 2021), and 2) a $0.641$-competitive algorithm for the prophet… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01191v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01191v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01191v1-abstract-full" style="display: none;"> The prophet secretary problem is a combination of the prophet inequality and the secretary problem, where elements are drawn from known independent distributions and arrive in uniformly random order. In this work, we design 1) a $0.688$-competitive algorithm, that breaks the $0.675$ barrier of blind strategies (Correa, Saona, Ziliotto, 2021), and 2) a $0.641$-competitive algorithm for the prophet secretary matching problem, that breaks the $1-1/e\approx 0.632$ barrier for the first time. Our second result also applies to the query-commit model of weighted stochastic matching and improves the state-of-the-art ratio (Derakhshan and Farhadi, 2023). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01191v1-abstract-full').style.display = 'none'; document.getElementById('2411.01191v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22733">arXiv:2410.22733</a> <span> [<a href="https://arxiv.org/pdf/2410.22733">pdf</a>, <a href="https://arxiv.org/format/2410.22733">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ETO:Efficient Transformer-based Local Feature Matching by Organizing Multiple Homography Hypotheses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ni%2C+J">Junjie Ni</a>, <a href="/search/?searchtype=author&query=Zhang%2C+G">Guofeng Zhang</a>, <a href="/search/?searchtype=author&query=Li%2C+G">Guanglin Li</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yijin Li</a>, <a href="/search/?searchtype=author&query=Liu%2C+X">Xinyang Liu</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhaoyang Huang</a>, <a href="/search/?searchtype=author&query=Bao%2C+H">Hujun Bao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22733v2-abstract-short" style="display: inline;"> We tackle the efficiency problem of learning local feature matching. Recent advancements have given rise to purely CNN-based and transformer-based approaches, each augmented with deep learning techniques. While CNN-based methods often excel in matching speed, transformer-based methods tend to provide more accurate matches. We propose an efficient transformer-based network architecture for local fe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22733v2-abstract-full').style.display = 'inline'; document.getElementById('2410.22733v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22733v2-abstract-full" style="display: none;"> We tackle the efficiency problem of learning local feature matching. Recent advancements have given rise to purely CNN-based and transformer-based approaches, each augmented with deep learning techniques. While CNN-based methods often excel in matching speed, transformer-based methods tend to provide more accurate matches. We propose an efficient transformer-based network architecture for local feature matching. This technique is built on constructing multiple homography hypotheses to approximate the continuous correspondence in the real world and uni-directional cross-attention to accelerate the refinement. On the YFCC100M dataset, our matching accuracy is competitive with LoFTR, a state-of-the-art transformer-based architecture, while the inference speed is boosted to 4 times, even outperforming the CNN-based methods. Comprehensive evaluations on other open datasets such as Megadepth, ScanNet, and HPatches demonstrate our method's efficacy, highlighting its potential to significantly enhance a wide array of downstream applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22733v2-abstract-full').style.display = 'none'; document.getElementById('2410.22733v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22724">arXiv:2410.22724</a> <span> [<a href="https://arxiv.org/pdf/2410.22724">pdf</a>, <a href="https://arxiv.org/ps/2410.22724">ps</a>, <a href="https://arxiv.org/format/2410.22724">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Solar and Stellar Astrophysics">astro-ph.SR</span> </div> </div> <p class="title is-5 mathjax"> Tracking an eruptive prominence using multiwavelength and multiview observations on 2023 March 7 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Q">Qingmin Zhang</a>, <a href="/search/?searchtype=author&query=Ou%2C+Y">Yudi Ou</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhenghua Huang</a>, <a href="/search/?searchtype=author&query=Song%2C+Y">Yongliang Song</a>, <a href="/search/?searchtype=author&query=Ma%2C+S">Suli Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22724v1-abstract-short" style="display: inline;"> In this paper, we carry out multiwavelength and multiview observations of the prominence eruption, which generates a C2.3 class flare and a coronal mass ejection (CME) on 2023 March 7. For the first time, we apply the revised cone model to three-dimension reconstruction and tracking of the eruptive prominence for ~4 hrs. The prominence propagates non-radially and makes a detour around the large-sc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22724v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22724v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22724v1-abstract-full" style="display: none;"> In this paper, we carry out multiwavelength and multiview observations of the prominence eruption, which generates a C2.3 class flare and a coronal mass ejection (CME) on 2023 March 7. For the first time, we apply the revised cone model to three-dimension reconstruction and tracking of the eruptive prominence for ~4 hrs. The prominence propagates non-radially and makes a detour around the large-scale coronal loops in active region NOAA 13243. The northward deflection angle increases from ~36 degrees to ~47 degrees before returning to ~36 degrees and keeping up. There is no longitudinal deflection throughout the propagation. The angular width of the cone increases from ~30 degrees and reaches a plateau at ~37 degrees. The heliocentric distance of the prominence rises from ~1.1 to ~10.0 solar radii, and the prominence experiences continuous acceleration (~51 m/s^2) over two hours, which is probably related to the magnetic reconnection during the C-class flare. The true speed of CME front is estimated to be ~829 km/s, which is ~1.2 times larger than that of CME core (prominence). It is concluded that both acceleration and deflection of eruptive prominences in their early lives could be reproduced with the revised cone model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22724v1-abstract-full').style.display = 'none'; document.getElementById('2410.22724v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 13 figures, accepted for publication in ApJ</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22353">arXiv:2410.22353</a> <span> [<a href="https://arxiv.org/pdf/2410.22353">pdf</a>, <a href="https://arxiv.org/format/2410.22353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RuleRAG: Rule-guided retrieval-augmented generation with language models for question answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhongwu Chen</a>, <a href="/search/?searchtype=author&query=Xu%2C+C">Chengjin Xu</a>, <a href="/search/?searchtype=author&query=Wang%2C+D">Dingmin Wang</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhen Huang</a>, <a href="/search/?searchtype=author&query=Dou%2C+Y">Yong Dou</a>, <a href="/search/?searchtype=author&query=Guo%2C+J">Jian Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22353v1-abstract-short" style="display: inline;"> Retrieval-augmented generation (RAG) framework has shown promising potential in knowledge-intensive question answering (QA) by retrieving external corpus and generating based on augmented context. However, existing approaches only consider the query itself, neither specifying the retrieval preferences for the retrievers nor informing the generators of how to refer to the retrieved documents for th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22353v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22353v1-abstract-full" style="display: none;"> Retrieval-augmented generation (RAG) framework has shown promising potential in knowledge-intensive question answering (QA) by retrieving external corpus and generating based on augmented context. However, existing approaches only consider the query itself, neither specifying the retrieval preferences for the retrievers nor informing the generators of how to refer to the retrieved documents for the answers, which poses a significant challenge to the QA performance. To address these issues, we propose Rule-Guided Retrieval-Augmented Generation with LMs, which explicitly introduces symbolic rules as demonstrations for in-context learning (RuleRAG-ICL) to guide retrievers to retrieve logically related documents in the directions of rules and uniformly guide generators to generate answers attributed by the guidance of the same set of rules. Moreover, the combination of queries and rules can be further used as supervised fine-tuning data to update retrievers and generators (RuleRAG-FT) to achieve better rule-based instruction following capability, leading to retrieve more supportive results and generate more acceptable answers. To emphasize the attribution of rules, we construct five rule-aware QA benchmarks, including three temporal and two static scenarios, and equip RuleRAG with several kinds of retrievers and generators. Experiments demonstrate that training-free RuleRAG-ICL effectively improves the retrieval quality of +89.2% in Recall@10 scores and generation accuracy of +103.1% in exact match scores over standard RAG on average across the five benchmarks, and further fine-tuned RuleRAG-FT consistently yields more significant performance enhancement. Extensive analyses indicate that RuleRAG scales well with increasing numbers of retrieved documents and exhibits generalization ability for untrained rules. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22353v1-abstract-full').style.display = 'none'; document.getElementById('2410.22353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20466">arXiv:2410.20466</a> <span> [<a href="https://arxiv.org/pdf/2410.20466">pdf</a>, <a href="https://arxiv.org/format/2410.20466">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Guidance Disentanglement Network for Optics-Guided Thermal UAV Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhao%2C+Z">Zhicheng Zhao</a>, <a href="/search/?searchtype=author&query=Gu%2C+J">Juanjuan Gu</a>, <a href="/search/?searchtype=author&query=Li%2C+C">Chenglong Li</a>, <a href="/search/?searchtype=author&query=Wang%2C+C">Chun Wang</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhongling Huang</a>, <a href="/search/?searchtype=author&query=Tang%2C+J">Jin Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20466v1-abstract-short" style="display: inline;"> Optics-guided Thermal UAV image Super-Resolution (OTUAV-SR) has attracted significant research interest due to its potential applications in security inspection, agricultural measurement, and object detection. Existing methods often employ single guidance model to generate the guidance features from optical images to assist thermal UAV images super-resolution. However, single guidance models make… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20466v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20466v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20466v1-abstract-full" style="display: none;"> Optics-guided Thermal UAV image Super-Resolution (OTUAV-SR) has attracted significant research interest due to its potential applications in security inspection, agricultural measurement, and object detection. Existing methods often employ single guidance model to generate the guidance features from optical images to assist thermal UAV images super-resolution. However, single guidance models make it difficult to generate effective guidance features under favorable and adverse conditions in UAV scenarios, thus limiting the performance of OTUAV-SR. To address this issue, we propose a novel Guidance Disentanglement network (GDNet), which disentangles the optical image representation according to typical UAV scenario attributes to form guidance features under both favorable and adverse conditions, for robust OTUAV-SR. Moreover, we design an attribute-aware fusion module to combine all attribute-based optical guidance features, which could form a more discriminative representation and fit the attribute-agnostic guidance process. To facilitate OTUAV-SR research in complex UAV scenarios, we introduce VGTSR2.0, a large-scale benchmark dataset containing 3,500 aligned optical-thermal image pairs captured under diverse conditions and scenes. Extensive experiments on VGTSR2.0 demonstrate that GDNet significantly improves OTUAV-SR performance over state-of-the-art methods, especially in the challenging low-light and foggy environments commonly encountered in UAV scenarios. The dataset and code will be publicly available at https://github.com/Jocelyney/GDNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20466v1-abstract-full').style.display = 'none'; document.getElementById('2410.20466v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 19 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20451">arXiv:2410.20451</a> <span> [<a href="https://arxiv.org/pdf/2410.20451">pdf</a>, <a href="https://arxiv.org/format/2410.20451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BlinkVision: A Benchmark for Optical Flow, Scene Flow and Point Tracking Estimation using RGB Frames and Events </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+Y">Yijin Li</a>, <a href="/search/?searchtype=author&query=Shen%2C+Y">Yichen Shen</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhaoyang Huang</a>, <a href="/search/?searchtype=author&query=Chen%2C+S">Shuo Chen</a>, <a href="/search/?searchtype=author&query=Bian%2C+W">Weikang Bian</a>, <a href="/search/?searchtype=author&query=Shi%2C+X">Xiaoyu Shi</a>, <a href="/search/?searchtype=author&query=Wang%2C+F">Fu-Yun Wang</a>, <a href="/search/?searchtype=author&query=Sun%2C+K">Keqiang Sun</a>, <a href="/search/?searchtype=author&query=Bao%2C+H">Hujun Bao</a>, <a href="/search/?searchtype=author&query=Cui%2C+Z">Zhaopeng Cui</a>, <a href="/search/?searchtype=author&query=Zhang%2C+G">Guofeng Zhang</a>, <a href="/search/?searchtype=author&query=Li%2C+H">Hongsheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20451v1-abstract-short" style="display: inline;"> Recent advances in event-based vision suggest that these systems complement traditional cameras by providing continuous observation without frame rate limitations and a high dynamic range, making them well-suited for correspondence tasks such as optical flow and point tracking. However, there is still a lack of comprehensive benchmarks for correspondence tasks that include both event data and imag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20451v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20451v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20451v1-abstract-full" style="display: none;"> Recent advances in event-based vision suggest that these systems complement traditional cameras by providing continuous observation without frame rate limitations and a high dynamic range, making them well-suited for correspondence tasks such as optical flow and point tracking. However, there is still a lack of comprehensive benchmarks for correspondence tasks that include both event data and images. To address this gap, we propose BlinkVision, a large-scale and diverse benchmark with multiple modalities and dense correspondence annotations. BlinkVision offers several valuable features: 1) Rich modalities: It includes both event data and RGB images. 2) Extensive annotations: It provides dense per-pixel annotations covering optical flow, scene flow, and point tracking. 3) Large vocabulary: It contains 410 everyday categories, sharing common classes with popular 2D and 3D datasets like LVIS and ShapeNet. 4) Naturalistic: It delivers photorealistic data and covers various naturalistic factors, such as camera shake and deformation. BlinkVision enables extensive benchmarks on three types of correspondence tasks (optical flow, point tracking, and scene flow estimation) for both image-based and event-based methods, offering new observations, practices, and insights for future research. The benchmark website is https://www.blinkvision.net/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20451v1-abstract-full').style.display = 'none'; document.getElementById('2410.20451v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ECCV 2024. Project Page: https://www.blinkvision.net/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19310">arXiv:2410.19310</a> <span> [<a href="https://arxiv.org/pdf/2410.19310">pdf</a>, <a href="https://arxiv.org/format/2410.19310">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Flow Generator Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zemin Huang</a>, <a href="/search/?searchtype=author&query=Geng%2C+Z">Zhengyang Geng</a>, <a href="/search/?searchtype=author&query=Luo%2C+W">Weijian Luo</a>, <a href="/search/?searchtype=author&query=Qi%2C+G">Guo-jun Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19310v1-abstract-short" style="display: inline;"> In the realm of Artificial Intelligence Generated Content (AIGC), flow-matching models have emerged as a powerhouse, achieving success due to their robust theoretical underpinnings and solid ability for large-scale generative modeling. These models have demonstrated state-of-the-art performance, but their brilliance comes at a cost. The process of sampling from these models is notoriously demandin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19310v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19310v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19310v1-abstract-full" style="display: none;"> In the realm of Artificial Intelligence Generated Content (AIGC), flow-matching models have emerged as a powerhouse, achieving success due to their robust theoretical underpinnings and solid ability for large-scale generative modeling. These models have demonstrated state-of-the-art performance, but their brilliance comes at a cost. The process of sampling from these models is notoriously demanding on computational resources, as it necessitates the use of multi-step numerical ordinary differential equations (ODEs). Against this backdrop, this paper presents a novel solution with theoretical guarantees in the form of Flow Generator Matching (FGM), an innovative approach designed to accelerate the sampling of flow-matching models into a one-step generation, while maintaining the original performance. On the CIFAR10 unconditional generation benchmark, our one-step FGM model achieves a new record Fr茅chet Inception Distance (FID) score of 3.08 among few-step flow-matching-based models, outperforming original 50-step flow-matching models. Furthermore, we use the FGM to distill the Stable Diffusion 3, a leading text-to-image flow-matching model based on the MM-DiT architecture. The resulting MM-DiT-FGM one-step text-to-image model demonstrates outstanding industry-level performance. When evaluated on the GenEval benchmark, MM-DiT-FGM has delivered remarkable generating qualities, rivaling other multi-step models in light of the efficiency of a single generation step. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19310v1-abstract-full').style.display = 'none'; document.getElementById('2410.19310v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18982">arXiv:2410.18982</a> <span> [<a href="https://arxiv.org/pdf/2410.18982">pdf</a>, <a href="https://arxiv.org/format/2410.18982">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> O1 Replication Journey: A Strategic Progress Report -- Part 1 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Qin%2C+Y">Yiwei Qin</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xuefeng Li</a>, <a href="/search/?searchtype=author&query=Zou%2C+H">Haoyang Zou</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yixiu Liu</a>, <a href="/search/?searchtype=author&query=Xia%2C+S">Shijie Xia</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhen Huang</a>, <a href="/search/?searchtype=author&query=Ye%2C+Y">Yixin Ye</a>, <a href="/search/?searchtype=author&query=Yuan%2C+W">Weizhe Yuan</a>, <a href="/search/?searchtype=author&query=Liu%2C+H">Hector Liu</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yuanzhi Li</a>, <a href="/search/?searchtype=author&query=Liu%2C+P">Pengfei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18982v1-abstract-short" style="display: inline;"> This paper introduces a pioneering approach to artificial intelligence research, embodied in our O1 Replication Journey. In response to the announcement of OpenAI's groundbreaking O1 model, we embark on a transparent, real-time exploration to replicate its capabilities while reimagining the process of conducting and communicating AI research. Our methodology addresses critical challenges in modern… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18982v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18982v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18982v1-abstract-full" style="display: none;"> This paper introduces a pioneering approach to artificial intelligence research, embodied in our O1 Replication Journey. In response to the announcement of OpenAI's groundbreaking O1 model, we embark on a transparent, real-time exploration to replicate its capabilities while reimagining the process of conducting and communicating AI research. Our methodology addresses critical challenges in modern AI research, including the insularity of prolonged team-based projects, delayed information sharing, and the lack of recognition for diverse contributions. By providing comprehensive, real-time documentation of our replication efforts, including both successes and failures, we aim to foster open science, accelerate collective advancement, and lay the groundwork for AI-driven scientific discovery. Our research progress report diverges significantly from traditional research papers, offering continuous updates, full process transparency, and active community engagement throughout the research journey. Technologically, we proposed the journey learning paradigm, which encourages models to learn not just shortcuts, but the complete exploration process, including trial and error, reflection, and backtracking. With only 327 training samples and without any additional tricks, journey learning outperformed conventional supervised learning by over 8\% on the MATH dataset, demonstrating its extremely powerful potential. We believe this to be the most crucial component of O1 technology that we have successfully decoded. We share valuable resources including technical hypotheses and insights, cognitive exploration maps, custom-developed tools, etc at https://github.com/GAIR-NLP/O1-Journey. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18982v1-abstract-full').style.display = 'none'; document.getElementById('2410.18982v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18784">arXiv:2410.18784</a> <span> [<a href="https://arxiv.org/pdf/2410.18784">pdf</a>, <a href="https://arxiv.org/ps/2410.18784">ps</a>, <a href="https://arxiv.org/format/2410.18784">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Denoising diffusion probabilistic models are optimally adaptive to unknown low dimensionality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhihan Huang</a>, <a href="/search/?searchtype=author&query=Wei%2C+Y">Yuting Wei</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yuxin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18784v2-abstract-short" style="display: inline;"> The denoising diffusion probabilistic model (DDPM) has emerged as a mainstream generative model in generative AI. While sharp convergence guarantees have been established for the DDPM, the iteration complexity is, in general, proportional to the ambient data dimension, resulting in overly conservative theory that fails to explain its practical efficiency. This has motivated the recent work Li and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18784v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18784v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18784v2-abstract-full" style="display: none;"> The denoising diffusion probabilistic model (DDPM) has emerged as a mainstream generative model in generative AI. While sharp convergence guarantees have been established for the DDPM, the iteration complexity is, in general, proportional to the ambient data dimension, resulting in overly conservative theory that fails to explain its practical efficiency. This has motivated the recent work Li and Yan (2024a) to investigate how the DDPM can achieve sampling speed-ups through automatic exploitation of intrinsic low dimensionality of data. We strengthen this line of work by demonstrating, in some sense, optimal adaptivity to unknown low dimensionality. For a broad class of data distributions with intrinsic dimension $k$, we prove that the iteration complexity of the DDPM scales nearly linearly with $k$, which is optimal when using KL divergence to measure distributional discrepancy. Notably, our work is closely aligned with the independent concurrent work Potaptchik et al. (2024) -- posted two weeks prior to ours -- in establishing nearly linear-$k$ convergence guarantees for the DDPM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18784v2-abstract-full').style.display = 'none'; document.getElementById('2410.18784v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18433">arXiv:2410.18433</a> <span> [<a href="https://arxiv.org/pdf/2410.18433">pdf</a>, <a href="https://arxiv.org/format/2410.18433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Segmentation-aware Prior Assisted Joint Global Information Aggregated 3D Building Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Peng%2C+H">Hongxin Peng</a>, <a href="/search/?searchtype=author&query=Liao%2C+Y">Yongjian Liao</a>, <a href="/search/?searchtype=author&query=Li%2C+W">Weijun Li</a>, <a href="/search/?searchtype=author&query=Fu%2C+C">Chuanyu Fu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+G">Guoxin Zhang</a>, <a href="/search/?searchtype=author&query=Ding%2C+Z">Ziquan Ding</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zijie Huang</a>, <a href="/search/?searchtype=author&query=Cao%2C+Q">Qiku Cao</a>, <a href="/search/?searchtype=author&query=Cai%2C+S">Shuting Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18433v1-abstract-short" style="display: inline;"> Multi-View Stereo plays a pivotal role in civil engineering by facilitating 3D modeling, precise engineering surveying, quantitative analysis, as well as monitoring and maintenance. It serves as a valuable tool, offering high-precision and real-time spatial information crucial for various engineering projects. However, Multi-View Stereo algorithms encounter challenges in reconstructing weakly-text… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18433v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18433v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18433v1-abstract-full" style="display: none;"> Multi-View Stereo plays a pivotal role in civil engineering by facilitating 3D modeling, precise engineering surveying, quantitative analysis, as well as monitoring and maintenance. It serves as a valuable tool, offering high-precision and real-time spatial information crucial for various engineering projects. However, Multi-View Stereo algorithms encounter challenges in reconstructing weakly-textured regions within large-scale building scenes. In these areas, the stereo matching of pixels often fails, leading to inaccurate depth estimations. Based on the Segment Anything Model and RANSAC algorithm, we propose an algorithm that accurately segments weakly-textured regions and constructs their plane priors. These plane priors, combined with triangulation priors, form a reliable prior candidate set. Additionally, we introduce a novel global information aggregation cost function. This function selects optimal plane prior information based on global information in the prior candidate set, constrained by geometric consistency during the depth estimation update process. Experimental results on both the ETH3D benchmark dataset, aerial dataset, building dataset and real scenarios substantiate the superior performance of our method in producing 3D building models compared to other state-of-the-art methods. In summary, our work aims to enhance the completeness and density of 3D building reconstruction, carrying implications for broader applications in urban planning and virtual reality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18433v1-abstract-full').style.display = 'none'; document.getElementById('2410.18433v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18113">arXiv:2410.18113</a> <span> [<a href="https://arxiv.org/pdf/2410.18113">pdf</a>, <a href="https://arxiv.org/format/2410.18113">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scalable Co-Clustering for Large-Scale Data through Dynamic Partitioning and Hierarchical Merging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wu%2C+Z">Zihan Wu</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhaoke Huang</a>, <a href="/search/?searchtype=author&query=Yan%2C+H">Hong Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18113v1-abstract-short" style="display: inline;"> Co-clustering simultaneously clusters rows and columns, revealing more fine-grained groups. However, existing co-clustering methods suffer from poor scalability and cannot handle large-scale data. This paper presents a novel and scalable co-clustering method designed to uncover intricate patterns in high-dimensional, large-scale datasets. Specifically, we first propose a large matrix partitioning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18113v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18113v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18113v1-abstract-full" style="display: none;"> Co-clustering simultaneously clusters rows and columns, revealing more fine-grained groups. However, existing co-clustering methods suffer from poor scalability and cannot handle large-scale data. This paper presents a novel and scalable co-clustering method designed to uncover intricate patterns in high-dimensional, large-scale datasets. Specifically, we first propose a large matrix partitioning algorithm that partitions a large matrix into smaller submatrices, enabling parallel co-clustering. This method employs a probabilistic model to optimize the configuration of submatrices, balancing the computational efficiency and depth of analysis. Additionally, we propose a hierarchical co-cluster merging algorithm that efficiently identifies and merges co-clusters from these submatrices, enhancing the robustness and reliability of the process. Extensive evaluations validate the effectiveness and efficiency of our method. Experimental results demonstrate a significant reduction in computation time, with an approximate 83% decrease for dense matrices and up to 30% for sparse matrices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18113v1-abstract-full').style.display = 'none'; document.getElementById('2410.18113v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 2 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> H.2.8 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17955">arXiv:2410.17955</a> <span> [<a href="https://arxiv.org/pdf/2410.17955">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> A new exceptional point condition for coupled microresonators with coupled mode theory in space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhu%2C+K">Kunpeng Zhu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+X">Xiaoyan Zhou</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yinxin Zhang</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhanhua Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+L">Lin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17955v1-abstract-short" style="display: inline;"> We derive new exceptional point (EP) conditions of the coupled microring resonators using coupled mode theory in space, a more accurate approach than the commonly used coupled mode theory in time. Transmission spectra around EPs obtained from the two models have been compared on two material platforms, revealing non-negligible deviations. Our analysis provides a guide for accurately determining pa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17955v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17955v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17955v1-abstract-full" style="display: none;"> We derive new exceptional point (EP) conditions of the coupled microring resonators using coupled mode theory in space, a more accurate approach than the commonly used coupled mode theory in time. Transmission spectra around EPs obtained from the two models have been compared on two material platforms, revealing non-negligible deviations. Our analysis provides a guide for accurately determining parameter sets of coupled microrings at EPs and deepens our understanding on parity-time-symmetric coupled resonators at EPs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17955v1-abstract-full').style.display = 'none'; document.getElementById('2410.17955v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17941">arXiv:2410.17941</a> <span> [<a href="https://arxiv.org/pdf/2410.17941">pdf</a>, <a href="https://arxiv.org/format/2410.17941">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Spiking Graph Neural Network on Riemannian Manifolds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Sun%2C+L">Li Sun</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhenhao Huang</a>, <a href="/search/?searchtype=author&query=Wan%2C+Q">Qiqi Wan</a>, <a href="/search/?searchtype=author&query=Peng%2C+H">Hao Peng</a>, <a href="/search/?searchtype=author&query=Yu%2C+P+S">Philip S. Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17941v1-abstract-short" style="display: inline;"> Graph neural networks (GNNs) have become the dominant solution for learning on graphs, the typical non-Euclidean structures. Conventional GNNs, constructed with the Artificial Neuron Network (ANN), have achieved impressive performance at the cost of high computation and energy consumption. In parallel, spiking GNNs with brain-like spiking neurons are drawing increasing research attention owing to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17941v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17941v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17941v1-abstract-full" style="display: none;"> Graph neural networks (GNNs) have become the dominant solution for learning on graphs, the typical non-Euclidean structures. Conventional GNNs, constructed with the Artificial Neuron Network (ANN), have achieved impressive performance at the cost of high computation and energy consumption. In parallel, spiking GNNs with brain-like spiking neurons are drawing increasing research attention owing to the energy efficiency. So far, existing spiking GNNs consider graphs in Euclidean space, ignoring the structural geometry, and suffer from the high latency issue due to Back-Propagation-Through-Time (BPTT) with the surrogate gradient. In light of the aforementioned issues, we are devoted to exploring spiking GNN on Riemannian manifolds, and present a Manifold-valued Spiking GNN (MSG). In particular, we design a new spiking neuron on geodesically complete manifolds with the diffeomorphism, so that BPTT regarding the spikes is replaced by the proposed differentiation via manifold. Theoretically, we show that MSG approximates a solver of the manifold ordinary differential equation. Extensive experiments on common graphs show the proposed MSG achieves superior performance to previous spiking GNNs and energy efficiency to conventional GNNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17941v1-abstract-full').style.display = 'none'; document.getElementById('2410.17941v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024, 30 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17741">arXiv:2410.17741</a> <span> [<a href="https://arxiv.org/pdf/2410.17741">pdf</a>, <a href="https://arxiv.org/format/2410.17741">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.patcog.2024.110758">10.1016/j.patcog.2024.110758 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Efficient Neural Implicit Representation for 3D Human Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Z">Zexu Huang</a>, <a href="/search/?searchtype=author&query=Erfani%2C+S+M">Sarah Monazam Erfani</a>, <a href="/search/?searchtype=author&query=Lu%2C+S">Siying Lu</a>, <a href="/search/?searchtype=author&query=Gong%2C+M">Mingming Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17741v1-abstract-short" style="display: inline;"> High-fidelity digital human representations are increasingly in demand in the digital world, particularly for interactive telepresence, AR/VR, 3D graphics, and the rapidly evolving metaverse. Even though they work well in small spaces, conventional methods for reconstructing 3D human motion frequently require the use of expensive hardware and have high processing costs. This study presents HumanAv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17741v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17741v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17741v1-abstract-full" style="display: none;"> High-fidelity digital human representations are increasingly in demand in the digital world, particularly for interactive telepresence, AR/VR, 3D graphics, and the rapidly evolving metaverse. Even though they work well in small spaces, conventional methods for reconstructing 3D human motion frequently require the use of expensive hardware and have high processing costs. This study presents HumanAvatar, an innovative approach that efficiently reconstructs precise human avatars from monocular video sources. At the core of our methodology, we integrate the pre-trained HuMoR, a model celebrated for its proficiency in human motion estimation. This is adeptly fused with the cutting-edge neural radiance field technology, Instant-NGP, and the state-of-the-art articulated model, Fast-SNARF, to enhance the reconstruction fidelity and speed. By combining these two technologies, a system is created that can render quickly and effectively while also providing estimation of human pose parameters that are unmatched in accuracy. We have enhanced our system with an advanced posture-sensitive space reduction technique, which optimally balances rendering quality with computational efficiency. In our detailed experimental analysis using both artificial and real-world monocular videos, we establish the advanced performance of our approach. HumanAvatar consistently equals or surpasses contemporary leading-edge reconstruction techniques in quality. Furthermore, it achieves these complex reconstructions in minutes, a fraction of the time typically required by existing methods. Our models achieve a training speed that is 110X faster than that of State-of-The-Art (SoTA) NeRF-based models. Our technique performs noticeably better than SoTA dynamic human NeRF methods if given an identical runtime limit. HumanAvatar can provide effective visuals after only 30 seconds of training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17741v1-abstract-full').style.display = 'none'; document.getElementById('2410.17741v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Pattern Recognition, Vol. 156, 2024, Article No. 110758 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17084">arXiv:2410.17084</a> <span> [<a href="https://arxiv.org/pdf/2410.17084">pdf</a>, <a href="https://arxiv.org/format/2410.17084">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> GS-LIVM: Real-Time Photo-Realistic LiDAR-Inertial-Visual Mapping with Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xie%2C+Y">Yusen Xie</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zhenmin Huang</a>, <a href="/search/?searchtype=author&query=Wu%2C+J">Jin Wu</a>, <a href="/search/?searchtype=author&query=Ma%2C+J">Jun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17084v1-abstract-short" style="display: inline;"> In this paper, we introduce GS-LIVM, a real-time photo-realistic LiDAR-Inertial-Visual mapping framework with Gaussian Splatting tailored for outdoor scenes. Compared to existing methods based on Neural Radiance Fields (NeRF) and 3D Gaussian Splatting (3DGS), our approach enables real-time photo-realistic mapping while ensuring high-quality image rendering in large-scale unbounded outdoor environm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17084v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17084v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17084v1-abstract-full" style="display: none;"> In this paper, we introduce GS-LIVM, a real-time photo-realistic LiDAR-Inertial-Visual mapping framework with Gaussian Splatting tailored for outdoor scenes. Compared to existing methods based on Neural Radiance Fields (NeRF) and 3D Gaussian Splatting (3DGS), our approach enables real-time photo-realistic mapping while ensuring high-quality image rendering in large-scale unbounded outdoor environments. In this work, Gaussian Process Regression (GPR) is employed to mitigate the issues resulting from sparse and unevenly distributed LiDAR observations. The voxel-based 3D Gaussians map representation facilitates real-time dense mapping in large outdoor environments with acceleration governed by custom CUDA kernels. Moreover, the overall framework is designed in a covariance-centered manner, where the estimated covariance is used to initialize the scale and rotation of 3D Gaussians, as well as update the parameters of the GPR. We evaluate our algorithm on several outdoor datasets, and the results demonstrate that our method achieves state-of-the-art performance in terms of mapping efficiency and rendering quality. The source code is available on GitHub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17084v1-abstract-full').style.display = 'none'; document.getElementById('2410.17084v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16794">arXiv:2410.16794</a> <span> [<a href="https://arxiv.org/pdf/2410.16794">pdf</a>, <a href="https://arxiv.org/format/2410.16794">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> One-Step Diffusion Distillation through Score Implicit Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Luo%2C+W">Weijian Luo</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zemin Huang</a>, <a href="/search/?searchtype=author&query=Geng%2C+Z">Zhengyang Geng</a>, <a href="/search/?searchtype=author&query=Kolter%2C+J+Z">J. Zico Kolter</a>, <a href="/search/?searchtype=author&query=Qi%2C+G">Guo-jun Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16794v1-abstract-short" style="display: inline;"> Despite their strong performances on many generative tasks, diffusion models require a large number of sampling steps in order to generate realistic samples. This has motivated the community to develop effective methods to distill pre-trained diffusion models into more efficient models, but these methods still typically require few-step inference or perform substantially worse than the underlying… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16794v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16794v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16794v1-abstract-full" style="display: none;"> Despite their strong performances on many generative tasks, diffusion models require a large number of sampling steps in order to generate realistic samples. This has motivated the community to develop effective methods to distill pre-trained diffusion models into more efficient models, but these methods still typically require few-step inference or perform substantially worse than the underlying model. In this paper, we present Score Implicit Matching (SIM) a new approach to distilling pre-trained diffusion models into single-step generator models, while maintaining almost the same sample generation ability as the original model as well as being data-free with no need of training samples for distillation. The method rests upon the fact that, although the traditional score-based loss is intractable to minimize for generator models, under certain conditions we can efficiently compute the gradients for a wide class of score-based divergences between a diffusion model and a generator. SIM shows strong empirical performances for one-step generators: on the CIFAR10 dataset, it achieves an FID of 2.06 for unconditional generation and 1.96 for class-conditional generation. Moreover, by applying SIM to a leading transformer-based diffusion model, we distill a single-step generator for text-to-image (T2I) generation that attains an aesthetic score of 6.42 with no performance decline over the original multi-step counterpart, clearly outperforming the other one-step generators including SDXL-TURBO of 5.33, SDXL-LIGHTNING of 5.34 and HYPER-SDXL of 5.85. We will release this industry-ready one-step transformer-based T2I generator along with this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16794v1-abstract-full').style.display = 'none'; document.getElementById('2410.16794v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> NeurIPS 2024 </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Z&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository