CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 224 results for author: <span class="mathjax">Guo, R</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Guo%2C+R">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Guo, R"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Guo%2C+R&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Guo, R"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06398">arXiv:2502.06398</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06398">pdf</a>, <a href="https://arxiv.org/format/2502.06398">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Learning Counterfactual Outcomes Under Rank Preservation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+P">Peng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoxuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+C">Chunyuan Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+Y">Yan Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jiawei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruocheng Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06398v1-abstract-short" style="display: inline;"> Counterfactual inference aims to estimate the counterfactual outcome at the individual level given knowledge of an observed treatment and the factual outcome, with broad applications in fields such as epidemiology, econometrics, and management science. Previous methods rely on a known structural causal model (SCM) or assume the homogeneity of the exogenous variable and strict monotonicity between&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06398v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06398v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06398v1-abstract-full" style="display: none;"> Counterfactual inference aims to estimate the counterfactual outcome at the individual level given knowledge of an observed treatment and the factual outcome, with broad applications in fields such as epidemiology, econometrics, and management science. Previous methods rely on a known structural causal model (SCM) or assume the homogeneity of the exogenous variable and strict monotonicity between the outcome and exogenous variable. In this paper, we propose a principled approach for identifying and estimating the counterfactual outcome. We first introduce a simple and intuitive rank preservation assumption to identify the counterfactual outcome without relying on a known structural causal model. Building on this, we propose a novel ideal loss for theoretically unbiased learning of the counterfactual outcome and further develop a kernel-based estimator for its empirical estimation. Our theoretical analysis shows that the rank preservation assumption is not stronger than the homogeneity and strict monotonicity assumptions, and shows that the proposed ideal loss is convex, and the proposed estimator is unbiased. Extensive semi-synthetic and real-world experiments are conducted to demonstrate the effectiveness of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06398v1-abstract-full').style.display = 'none'; document.getElementById('2502.06398v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15406">arXiv:2501.15406</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.15406">pdf</a>, <a href="https://arxiv.org/format/2501.15406">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> A Token-FCM based risk assessment method for complex engineering designs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+G">Guan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Y">Yimin Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Rongbin Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yusheng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+Q">Qiang Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15406v1-abstract-short" style="display: inline;"> Engineering design risks could cause unaffordable losses, and thus risk assessment plays a critical role in engineering design. On the other hand, the high complexity of modern engineering designs makes it difficult to assess risks effectively and accurately due to the complex two-way, dynamic causal-effect risk relations in engineering designs. To address this problem, this paper proposes a new r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15406v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15406v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15406v1-abstract-full" style="display: none;"> Engineering design risks could cause unaffordable losses, and thus risk assessment plays a critical role in engineering design. On the other hand, the high complexity of modern engineering designs makes it difficult to assess risks effectively and accurately due to the complex two-way, dynamic causal-effect risk relations in engineering designs. To address this problem, this paper proposes a new risk assessment method called token fuzzy cognitive map (Token-FCM). Its basic idea is to model the two-way causal-risk relations with the FCM method, and then augment FCM with a token mechanism to model the dynamics in causal-effect risk relations. Furthermore, the fuzzy sets and the group decision-making method are introduced to initialize the Token-FCM method so that comprehensive and accurate risk assessments can be attained. The effectiveness of the proposed method has been demonstrated by a real example of engine design for a horizontal directional drilling machine. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15406v1-abstract-full').style.display = 'none'; document.getElementById('2501.15406v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15275">arXiv:2501.15275</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.15275">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> A Tale of Two Sides of Wafer: Physical Implementation and Block-Level PPA on Flip FET with Dual-sided Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lu%2C+H">Haoran Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+X">Xun Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Chu%2C+Y">Yanbang Chu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Z">Ziqiao Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Rui Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+W">Wanyue Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Y">Yibo Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+R">Runsheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+H">Heng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+R">Ru Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15275v1-abstract-short" style="display: inline;"> As the conventional scaling of logic devices comes to an end, functional wafer backside and 3D transistor stacking are consensus for next-generation logic technology, offering considerable design space extension for powers, signals or even devices on the wafer backside. The Flip FET (FFET), a novel transistor architecture combining 3D transistor stacking and fully functional wafer backside, was re&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15275v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15275v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15275v1-abstract-full" style="display: none;"> As the conventional scaling of logic devices comes to an end, functional wafer backside and 3D transistor stacking are consensus for next-generation logic technology, offering considerable design space extension for powers, signals or even devices on the wafer backside. The Flip FET (FFET), a novel transistor architecture combining 3D transistor stacking and fully functional wafer backside, was recently proposed. With symmetric dual-sided standard cell design, the FFET can deliver around 12.5% cell area scaling and faster but more energy-efficient libraries beyond other stacked transistor technologies such as CFET. Besides, thanks to the novel cell design with dual-sided pins, the FFET supports dual-sided signal routing, delivering better routability and larger backside design space. In this work, we demonstrated a comprehensive FFET evaluation framework considering physical implementation and block-level power-performance-area (PPA) assessment for the first time, in which key functions are dual-sided routing and dual-sided RC extraction. A 32-bit RISC-V core was used for the evaluation here. Compared to the CFET with single-sided signals, the FFET with single-sided signals achieved 23.3% post-P&amp;R core area reduction, 25.0% higher frequency and 11.9% lower power at the same utilization, and 16.0 % higher frequency at the same core area. Meanwhile, the FFET supports dual-sided signals, which can further benefit more from flexible allocation of cell input pins on both sides. By optimizing the input pin density and BEOL routing layer number on each side, 10.6% frequency gain was realized without power degradation compared to the one with single-sided signal routing. Moreover, the routability and power efficiency of FFET barely degrades even with the routing layer number reduced from 12 to 5 on each side, validating the great space for cost-friendly design enabled by FFET. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15275v1-abstract-full').style.display = 'none'; document.getElementById('2501.15275v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by DATE 2025</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proc. of DATE 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14548">arXiv:2501.14548</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.14548">pdf</a>, <a href="https://arxiv.org/format/2501.14548">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Large-scale and Fine-grained Vision-language Pre-training for Enhanced CT Image Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shui%2C+Z">Zhongyi Shui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jianpeng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+W">Weiwei Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Sinuo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruizhe Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+L">Lin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+X">Xianghua Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+T">Tingbo Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Ling Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14548v1-abstract-short" style="display: inline;"> Artificial intelligence (AI) shows great potential in assisting radiologists to improve the efficiency and accuracy of medical image interpretation and diagnosis. However, a versatile AI model requires large-scale data and comprehensive annotations, which are often impractical in medical settings. Recent studies leverage radiology reports as a naturally high-quality supervision for medical images,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14548v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14548v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14548v1-abstract-full" style="display: none;"> Artificial intelligence (AI) shows great potential in assisting radiologists to improve the efficiency and accuracy of medical image interpretation and diagnosis. However, a versatile AI model requires large-scale data and comprehensive annotations, which are often impractical in medical settings. Recent studies leverage radiology reports as a naturally high-quality supervision for medical images, using contrastive language-image pre-training (CLIP) to develop language-informed models for radiological image interpretation. Nonetheless, these approaches typically contrast entire images with reports, neglecting the local associations between imaging regions and report sentences, which may undermine model performance and interoperability. In this paper, we propose a fine-grained vision-language model (fVLM) for anatomy-level CT image interpretation. Specifically, we explicitly match anatomical regions of CT images with corresponding descriptions in radiology reports and perform contrastive pre-training for each anatomy individually. Fine-grained alignment, however, faces considerable false-negative challenges, mainly from the abundance of anatomy-level healthy samples and similarly diseased abnormalities. To tackle this issue, we propose identifying false negatives of both normal and abnormal samples and calibrating contrastive learning from patient-level to disease-aware pairing. We curated the largest CT dataset to date, comprising imaging and report data from 69,086 patients, and conducted a comprehensive evaluation of 54 major and important disease diagnosis tasks across 15 main anatomies. Experimental results demonstrate the substantial potential of fVLM in versatile medical image interpretation. In the zero-shot classification task, we achieved an average AUC of 81.3% on 54 diagnosis tasks, surpassing CLIP and supervised methods by 12.9% and 8.0%, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14548v1-abstract-full').style.display = 'none'; document.getElementById('2501.14548v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13989">arXiv:2501.13989</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.13989">pdf</a>, <a href="https://arxiv.org/format/2501.13989">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FreEformer: Frequency Enhanced Transformer for Multivariate Time Series Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yue%2C+W">Wenzhen Yue</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Ying%2C+X">Xianghua Ying</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+B">Bowei Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruohao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+J">Ji Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13989v1-abstract-short" style="display: inline;"> This paper presents \textbf{FreEformer}, a simple yet effective model that leverages a \textbf{Fre}quency \textbf{E}nhanced Trans\textbf{former} for multivariate time series forecasting. Our work is based on the assumption that the frequency spectrum provides a global perspective on the composition of series across various frequencies and is highly suitable for robust representation learning. Spec&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13989v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13989v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13989v1-abstract-full" style="display: none;"> This paper presents \textbf{FreEformer}, a simple yet effective model that leverages a \textbf{Fre}quency \textbf{E}nhanced Trans\textbf{former} for multivariate time series forecasting. Our work is based on the assumption that the frequency spectrum provides a global perspective on the composition of series across various frequencies and is highly suitable for robust representation learning. Specifically, we first convert time series into the complex frequency domain using the Discrete Fourier Transform (DFT). The Transformer architecture is then applied to the frequency spectra to capture cross-variate dependencies, with the real and imaginary parts processed independently. However, we observe that the vanilla attention matrix exhibits a low-rank characteristic, thus limiting representation diversity. This could be attributed to the inherent sparsity of the frequency domain and the strong-value-focused nature of Softmax in vanilla attention. To address this, we enhance the vanilla attention mechanism by introducing an additional learnable matrix to the original attention matrix, followed by row-wise L1 normalization. Theoretical analysis~demonstrates that this enhanced attention mechanism improves both feature diversity and gradient flow. Extensive experiments demonstrate that FreEformer consistently outperforms state-of-the-art models on eighteen real-world benchmarks covering electricity, traffic, weather, healthcare and finance. Notably, the enhanced attention mechanism also consistently improves the performance of state-of-the-art Transformer-based forecasters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13989v1-abstract-full').style.display = 'none'; document.getElementById('2501.13989v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09460">arXiv:2501.09460</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.09460">pdf</a>, <a href="https://arxiv.org/format/2501.09460">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Normal-NeRF: Ambiguity-Robust Normal Estimation for Highly Reflective Scenes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shi%2C+J">Ji Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Ying%2C+X">Xianghua Ying</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruohao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+B">Bowei Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Yue%2C+W">Wenzhen Yue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09460v1-abstract-short" style="display: inline;"> Neural Radiance Fields (NeRF) often struggle with reconstructing and rendering highly reflective scenes. Recent advancements have developed various reflection-aware appearance models to enhance NeRF&#39;s capability to render specular reflections. However, the robust reconstruction of highly reflective scenes is still hindered by the inherent shape ambiguity on specular surfaces. Existing methods typi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09460v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09460v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09460v1-abstract-full" style="display: none;"> Neural Radiance Fields (NeRF) often struggle with reconstructing and rendering highly reflective scenes. Recent advancements have developed various reflection-aware appearance models to enhance NeRF&#39;s capability to render specular reflections. However, the robust reconstruction of highly reflective scenes is still hindered by the inherent shape ambiguity on specular surfaces. Existing methods typically rely on additional geometry priors to regularize the shape prediction, but this can lead to oversmoothed geometry in complex scenes. Observing the critical role of surface normals in parameterizing reflections, we introduce a transmittance-gradient-based normal estimation technique that remains robust even under ambiguous shape conditions. Furthermore, we propose a dual activated densities module that effectively bridges the gap between smooth surface normals and sharp object boundaries. Combined with a reflection-aware appearance model, our proposed method achieves robust reconstruction and high-fidelity rendering of scenes featuring both highly specular reflections and intricate geometric structures. Extensive experiments demonstrate that our method outperforms existing state-of-the-art methods on various datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09460v1-abstract-full').style.display = 'none'; document.getElementById('2501.09460v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2025, code available at https://github.com/sjj118/Normal-NeRF</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08067">arXiv:2501.08067</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.08067">pdf</a>, <a href="https://arxiv.org/format/2501.08067">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Optimal Policy Adaptation under Covariate Shift </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xueqing Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Q">Qinwei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+Z">Zhaoqing Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruocheng Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+P">Peng Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08067v1-abstract-short" style="display: inline;"> Transfer learning of prediction models has been extensively studied, while the corresponding policy learning approaches are rarely discussed. In this paper, we propose principled approaches for learning the optimal policy in the target domain by leveraging two datasets: one with full information from the source domain and the other from the target domain with only covariates. First, under the sett&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08067v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08067v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08067v1-abstract-full" style="display: none;"> Transfer learning of prediction models has been extensively studied, while the corresponding policy learning approaches are rarely discussed. In this paper, we propose principled approaches for learning the optimal policy in the target domain by leveraging two datasets: one with full information from the source domain and the other from the target domain with only covariates. First, under the setting of covariate shift, we formulate the problem from a perspective of causality and present the identifiability assumptions for the reward induced by a given policy. Then, we derive the efficient influence function and the semiparametric efficiency bound for the reward. Based on this, we construct a doubly robust and semiparametric efficient estimator for the reward and then learn the optimal policy by optimizing the estimated reward. Moreover, we theoretically analyze the bias and the generalization error bound for the learned policy. Furthermore, in the presence of both covariate and concept shifts, we propose a novel sensitivity analysis method to evaluate the robustness of the proposed policy learning approach. Extensive experiments demonstrate that the approach not only estimates the reward more accurately but also yields a policy that closely approximates the theoretically optimal policy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08067v1-abstract-full').style.display = 'none'; document.getElementById('2501.08067v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05075">arXiv:2501.05075</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.05075">pdf</a>, <a href="https://arxiv.org/format/2501.05075">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Text-Based Knowledge-Embedded Soft Sensing Modeling Approach for General Industrial Process Tasks Based on Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tong%2C+S">Shuo Tong</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Han Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Runyuan Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+X">Xueqiong Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wenqing Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+D">Ding Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Youmin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05075v1-abstract-short" style="display: inline;"> Data-driven soft sensors (DDSS) have become mainstream methods for predicting key performance indicators in process industries. However, DDSS development requires complex and costly customized designs tailored to various tasks during the modeling process. Moreover, DDSS are constrained to a single structured data modality, limiting their ability to incorporate additional contextual knowledge. Furt&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05075v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05075v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05075v1-abstract-full" style="display: none;"> Data-driven soft sensors (DDSS) have become mainstream methods for predicting key performance indicators in process industries. However, DDSS development requires complex and costly customized designs tailored to various tasks during the modeling process. Moreover, DDSS are constrained to a single structured data modality, limiting their ability to incorporate additional contextual knowledge. Furthermore, DDSSs&#39; limited representation learning leads to weak predictive performance with scarce data. To address these challenges, we propose a general framework named LLM-TKESS (large language model for text-based knowledge-embedded soft sensing), harnessing the powerful general problem-solving capabilities, cross-modal knowledge transfer abilities, and few-shot capabilities of LLM for enhanced soft sensing modeling. Specifically, an auxiliary variable series encoder (AVS Encoder) is proposed to unleash LLM&#39;s potential for capturing temporal relationships within series and spatial semantic relationships among auxiliary variables. Then, we propose a two-stage fine-tuning alignment strategy: in the first stage, employing parameter-efficient fine-tuning through autoregressive training adjusts LLM to rapidly accommodate process variable data, resulting in a soft sensing foundation model (SSFM). Subsequently, by training adapters, we adapt the SSFM to various downstream tasks without modifying its architecture. Then, we propose two text-based knowledge-embedded soft sensors, integrating new natural language modalities to overcome the limitations of pure structured data models. Furthermore, benefiting from LLM&#39;s pre-existing world knowledge, our model demonstrates outstanding predictive capabilities in small sample conditions. Using the thermal deformation of air preheater rotor as a case study, we validate through extensive experiments that LLM-TKESS exhibits outstanding performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05075v1-abstract-full').style.display = 'none'; document.getElementById('2501.05075v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03295">arXiv:2501.03295</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.03295">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Soft Sensor Method with Uncertainty-Awareness and Self-Explanation Based on Large Language Models Enhanced by Domain Knowledge Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tong%2C+S">Shuo Tong</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Han Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Runyuan Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wenqing Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+X">Xueqiong Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+L">Lingyun Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Lin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+H">Huayong Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+D">Ding Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Youmin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03295v2-abstract-short" style="display: inline;"> Data-driven soft sensors are crucial in predicting key performance indicators in industrial systems. However, current methods predominantly rely on the supervised learning paradigms of parameter updating, which inherently faces challenges such as high development costs, poor robustness, training instability, and lack of interpretability. Recently, large language models (LLMs) have demonstrated sig&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03295v2-abstract-full').style.display = 'inline'; document.getElementById('2501.03295v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03295v2-abstract-full" style="display: none;"> Data-driven soft sensors are crucial in predicting key performance indicators in industrial systems. However, current methods predominantly rely on the supervised learning paradigms of parameter updating, which inherently faces challenges such as high development costs, poor robustness, training instability, and lack of interpretability. Recently, large language models (LLMs) have demonstrated significant potential across various domains, notably through In-Context Learning (ICL), which enables high-performance task execution with minimal input-label demonstrations and no prior training. This paper aims to replace supervised learning with the emerging ICL paradigm for soft sensor modeling to address existing challenges and explore new avenues for advancement. To achieve this, we propose a novel framework called the Few-shot Uncertainty-aware and self-Explaining Soft Sensor (LLM-FUESS), which includes the Zero-shot Auxiliary Variable Selector (LLM-ZAVS) and the Uncertainty-aware Few-shot Soft Sensor (LLM-UFSS). The LLM-ZAVS retrieves from the Industrial Knowledge Vector Storage to enhance LLMs&#39; domain-specific knowledge, enabling zero-shot auxiliary variable selection. In the LLM-UFSS, we utilize text-based context demonstrations of structured data to prompt LLMs to execute ICL for predicting and propose a context sample retrieval augmentation strategy to improve performance. Additionally, we explored LLMs&#39; AIGC and probabilistic characteristics to propose self-explanation and uncertainty quantification methods for constructing a trustworthy soft sensor. Extensive experiments demonstrate that our method achieved state-of-the-art predictive performance, strong robustness, and flexibility, effectively mitigates training instability found in traditional methods. To the best of our knowledge, this is the first work to establish soft sensor utilizing LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03295v2-abstract-full').style.display = 'none'; document.getElementById('2501.03295v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18911">arXiv:2412.18911</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.18911">pdf</a>, <a href="https://arxiv.org/format/2412.18911">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Accelerating Diffusion Transformers with Dual Feature Caching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zou%2C+C">Chang Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+E">Evelyn Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Runlin Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+H">Haohang Xu</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+C">Conghui He</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+X">Xuming Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Linfeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18911v1-abstract-short" style="display: inline;"> Diffusion Transformers (DiT) have become the dominant methods in image and video generation yet still suffer substantial computational costs. As an effective approach for DiT acceleration, feature caching methods are designed to cache the features of DiT in previous timesteps and reuse them in the next timesteps, allowing us to skip the computation in the next timesteps. However, on the one hand,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18911v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18911v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18911v1-abstract-full" style="display: none;"> Diffusion Transformers (DiT) have become the dominant methods in image and video generation yet still suffer substantial computational costs. As an effective approach for DiT acceleration, feature caching methods are designed to cache the features of DiT in previous timesteps and reuse them in the next timesteps, allowing us to skip the computation in the next timesteps. However, on the one hand, aggressively reusing all the features cached in previous timesteps leads to a severe drop in generation quality. On the other hand, conservatively caching only the features in the redundant layers or tokens but still computing the important ones successfully preserves the generation quality but results in reductions in acceleration ratios. Observing such a tradeoff between generation quality and acceleration performance, this paper begins by quantitatively studying the accumulated error from cached features. Surprisingly, we find that aggressive caching does not introduce significantly more caching errors in the caching step, and the conservative feature caching can fix the error introduced by aggressive caching. Thereby, we propose a dual caching strategy that adopts aggressive and conservative caching iteratively, leading to significant acceleration and high generation quality at the same time. Besides, we further introduce a V-caching strategy for token-wise conservative caching, which is compatible with flash attention and requires no training and calibration data. Our codes have been released in Github: \textbf{Code: \href{https://github.com/Shenyi-Z/DuCa}{\texttt{\textcolor{cyan}{https://github.com/Shenyi-Z/DuCa}}}} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18911v1-abstract-full').style.display = 'none'; document.getElementById('2412.18911v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18100">arXiv:2412.18100</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.18100">pdf</a>, <a href="https://arxiv.org/format/2412.18100">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EvoPat: A Multi-LLM-based Patents Summarization and Analysis Agent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Suyuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+X">Xueqian Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Menghao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruofeng Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Nan%2C+K">Kai Nan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18100v1-abstract-short" style="display: inline;"> The rapid growth of scientific techniques and knowledge is reflected in the exponential increase in new patents filed annually. While these patents drive innovation, they also present significant burden for researchers and engineers, especially newcomers. To avoid the tedious work of navigating a vast and complex landscape to identify trends and breakthroughs, researchers urgently need efficient t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18100v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18100v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18100v1-abstract-full" style="display: none;"> The rapid growth of scientific techniques and knowledge is reflected in the exponential increase in new patents filed annually. While these patents drive innovation, they also present significant burden for researchers and engineers, especially newcomers. To avoid the tedious work of navigating a vast and complex landscape to identify trends and breakthroughs, researchers urgently need efficient tools to summarize, evaluate, and contextualize patents, revealing their innovative contributions and underlying scientific principles.To address this need, we present EvoPat, a multi-LLM-based patent agent designed to assist users in analyzing patents through Retrieval-Augmented Generation (RAG) and advanced search strategies. EvoPat leverages multiple Large Language Models (LLMs), each performing specialized roles such as planning, identifying innovations, and conducting comparative evaluations. The system integrates data from local databases, including patents, literature, product catalogous, and company repositories, and online searches to provide up-to-date insights. The ability to collect information not included in original database automatically is also implemented. Through extensive testing in the natural language processing (NLP) domain, we demonstrate that EvoPat outperforms GPT-4 in tasks such as patent summarization, comparative analysis, and technical evaluation. EvoPat represents a significant step toward creating AI-powered tools that empower researchers and engineers to efficiently navigate the complexities of the patent landscape. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18100v1-abstract-full').style.display = 'none'; document.getElementById('2412.18100v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages,2 figures,8 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> I.2.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13111">arXiv:2412.13111</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.13111">pdf</a>, <a href="https://arxiv.org/format/2412.13111">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Motion-2-to-3: Leveraging 2D Motion Data to Boost 3D Motion Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pi%2C+H">Huaijin Pi</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruoxi Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Z">Zehong Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Shuai%2C+Q">Qing Shuai</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Z">Zechen Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhumei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Y">Yajiao Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+R">Ruizhen Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Komura%2C+T">Taku Komura</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+S">Sida Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xiaowei Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13111v1-abstract-short" style="display: inline;"> Text-driven human motion synthesis is capturing significant attention for its ability to effortlessly generate intricate movements from abstract text cues, showcasing its potential for revolutionizing motion design not only in film narratives but also in virtual reality experiences and computer game development. Existing methods often rely on 3D motion capture data, which require special setups re&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13111v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13111v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13111v1-abstract-full" style="display: none;"> Text-driven human motion synthesis is capturing significant attention for its ability to effortlessly generate intricate movements from abstract text cues, showcasing its potential for revolutionizing motion design not only in film narratives but also in virtual reality experiences and computer game development. Existing methods often rely on 3D motion capture data, which require special setups resulting in higher costs for data acquisition, ultimately limiting the diversity and scope of human motion. In contrast, 2D human videos offer a vast and accessible source of motion data, covering a wider range of styles and activities. In this paper, we explore leveraging 2D human motion extracted from videos as an alternative data source to improve text-driven 3D motion generation. Our approach introduces a novel framework that disentangles local joint motion from global movements, enabling efficient learning of local motion priors from 2D data. We first train a single-view 2D local motion generator on a large dataset of text-motion pairs. To enhance this model to synthesize 3D motion, we fine-tune the generator with 3D data, transforming it into a multi-view generator that predicts view-consistent local joint motion and root dynamics. Experiments on the HumanML3D dataset and novel text prompts demonstrate that our method efficiently utilizes 2D data, supporting realistic 3D human motion generation and broadening the range of motion types it supports. Our code will be made publicly available at https://zju3dv.github.io/Motion-2-to-3/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13111v1-abstract-full').style.display = 'none'; document.getElementById('2412.13111v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://zju3dv.github.io/Motion-2-to-3/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11934">arXiv:2412.11934</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.11934">pdf</a>, <a href="https://arxiv.org/format/2412.11934">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Stepwise Reasoning Error Disruption Attack of LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Peng%2C+J">Jingyu Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Maolin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+X">Xiangyu Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wanyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+P">Pengyue Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qidong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruocheng Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11934v2-abstract-short" style="display: inline;"> Large language models (LLMs) have made remarkable strides in complex reasoning tasks, but their safety and robustness in reasoning processes remain underexplored. Existing attacks on LLM reasoning are constrained by specific settings or lack of imperceptibility, limiting their feasibility and generalizability. To address these challenges, we propose the Stepwise rEasoning Error Disruption (SEED) a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11934v2-abstract-full').style.display = 'inline'; document.getElementById('2412.11934v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11934v2-abstract-full" style="display: none;"> Large language models (LLMs) have made remarkable strides in complex reasoning tasks, but their safety and robustness in reasoning processes remain underexplored. Existing attacks on LLM reasoning are constrained by specific settings or lack of imperceptibility, limiting their feasibility and generalizability. To address these challenges, we propose the Stepwise rEasoning Error Disruption (SEED) attack, which subtly injects errors into prior reasoning steps to mislead the model into producing incorrect subsequent reasoning and final answers. Unlike previous methods, SEED is compatible with zero-shot and few-shot settings, maintains the natural reasoning flow, and ensures covert execution without modifying the instruction. Extensive experiments on four datasets across four different models demonstrate SEED&#39;s effectiveness, revealing the vulnerabilities of LLMs to disruptions in reasoning processes. These findings underscore the need for greater attention to the robustness of LLM reasoning to ensure safety in practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11934v2-abstract-full').style.display = 'none'; document.getElementById('2412.11934v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09950">arXiv:2412.09950</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.09950">pdf</a>, <a href="https://arxiv.org/format/2412.09950">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Hesitation and Tolerance in Recommender Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zou%2C+K">Kuan Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+A">Aixin Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+X">Xuemeng Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+Y">Yitong Ji</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jing Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruijie Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09950v1-abstract-short" style="display: inline;"> User interactions in recommender systems are inherently complex, often involving behaviors that go beyond simple acceptance or rejection. One particularly common behavior is hesitation, where users deliberate over recommended items, signaling uncertainty. Our large-scale surveys, with 6,644 and 3,864 responses respectively, confirm that hesitation is not only widespread but also has a profound imp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09950v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09950v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09950v1-abstract-full" style="display: none;"> User interactions in recommender systems are inherently complex, often involving behaviors that go beyond simple acceptance or rejection. One particularly common behavior is hesitation, where users deliberate over recommended items, signaling uncertainty. Our large-scale surveys, with 6,644 and 3,864 responses respectively, confirm that hesitation is not only widespread but also has a profound impact on user experiences. When users spend additional time engaging with content they are ultimately uninterested in, this can lead to negative emotions, a phenomenon we term as tolerance. The surveys reveal that such tolerance behaviors often arise after hesitation and can erode trust, satisfaction, and long-term loyalty to the platform. For instance, a click might reflect a need for more information rather than genuine interest, and prolonged exposure to unsuitable content amplifies frustration. This misalignment between user intent and system interpretation introduces noise into recommendation training, resulting in suggestions that increase uncertainty and disengagement. To address these issues, we identified signals indicative of tolerance behavior and analyzed datasets from both e-commerce and short-video platforms. The analysis shows a strong correlation between increased tolerance behavior and decreased user activity. We integrated these insights into the training process of a recommender system for a major short-video platform. Results from four independent online A/B experiments demonstrated significant improvements in user retention, achieved with minimal additional computational costs. These findings underscore the importance of recognizing hesitation as a ubiquitous user behavior and addressing tolerance to enhance satisfaction, build trust, and sustain long-term engagement in recommender systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09950v1-abstract-full').style.display = 'none'; document.getElementById('2412.09950v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 6 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18463">arXiv:2411.18463</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.18463">pdf</a>, <a href="https://arxiv.org/format/2411.18463">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hotspot-Driven Peptide Design via Multi-Fragment Autoregressive Extension </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiahan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+T">Tong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+S">Shitong Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+C">Chaoran Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Guan%2C+J">Jiaqi Guan</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruihan Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Sheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+G">Ge Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+J">Jian Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+J">Jianzhu Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18463v1-abstract-short" style="display: inline;"> Peptides, short chains of amino acids, interact with target proteins, making them a unique class of protein-based therapeutics for treating human diseases. Recently, deep generative models have shown great promise in peptide generation. However, several challenges remain in designing effective peptide binders. First, not all residues contribute equally to peptide-target interactions. Second, the g&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18463v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18463v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18463v1-abstract-full" style="display: none;"> Peptides, short chains of amino acids, interact with target proteins, making them a unique class of protein-based therapeutics for treating human diseases. Recently, deep generative models have shown great promise in peptide generation. However, several challenges remain in designing effective peptide binders. First, not all residues contribute equally to peptide-target interactions. Second, the generated peptides must adopt valid geometries due to the constraints of peptide bonds. Third, realistic tasks for peptide drug development are still lacking. To address these challenges, we introduce PepHAR, a hot-spot-driven autoregressive generative model for designing peptides targeting specific proteins. Building on the observation that certain hot spot residues have higher interaction potentials, we first use an energy-based density model to fit and sample these key residues. Next, to ensure proper peptide geometry, we autoregressively extend peptide fragments by estimating dihedral angles between residue frames. Finally, we apply an optimization process to iteratively refine fragment assembly, ensuring correct peptide structures. By combining hot spot sampling with fragment-based extension, our approach enables de novo peptide design tailored to a target protein and allows the incorporation of key hot spot residues into peptide scaffolds. Extensive experiments, including peptide design and peptide scaffold generation, demonstrate the strong potential of PepHAR in computational peptide binder design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18463v1-abstract-full').style.display = 'none'; document.getElementById('2411.18463v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint, Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12364">arXiv:2411.12364</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.12364">pdf</a>, <a href="https://arxiv.org/format/2411.12364">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Ultra-Sparse Memory Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Zihao Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Min%2C+Q">Qiyang Min</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+H">Hongzhi Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+D">Defa Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+Y">Yutao Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ran Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12364v2-abstract-short" style="display: inline;"> It is widely acknowledged that the performance of Transformer models is logarithmically related to their number of parameters and computational complexity. While approaches like Mixture of Experts (MoE) decouple parameter count from computational complexity, they still face challenges in inference due to high memory access costs. This work introduces UltraMem, incorporating large-scale, ultra-spar&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12364v2-abstract-full').style.display = 'inline'; document.getElementById('2411.12364v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12364v2-abstract-full" style="display: none;"> It is widely acknowledged that the performance of Transformer models is logarithmically related to their number of parameters and computational complexity. While approaches like Mixture of Experts (MoE) decouple parameter count from computational complexity, they still face challenges in inference due to high memory access costs. This work introduces UltraMem, incorporating large-scale, ultra-sparse memory layer to address these limitations. Our approach significantly reduces inference latency while maintaining model performance. We also investigate the scaling laws of this new architecture, demonstrating that it not only exhibits favorable scaling properties but outperforms MoE. In experiments, the largest UltraMem we train has 20 million memory slots. The results show that our method achieves state-of-the-art inference speed and model performance within a given computational budget, paving the way for billions of slots or experts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12364v2-abstract-full').style.display = 'none'; document.getElementById('2411.12364v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11916">arXiv:2411.11916</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.11916">pdf</a>, <a href="https://arxiv.org/format/2411.11916">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> From Words to Structured Visuals: A Benchmark and Framework for Text-to-Diagram Generation and Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wei%2C+J">Jingxuan Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+C">Cheng Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Q">Qi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+G">Gaowei Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Siyuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+Z">Zhangyang Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+L">Linzhuang Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+B">Bihui Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruifeng Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11916v1-abstract-short" style="display: inline;"> We introduce the task of text-to-diagram generation, which focuses on creating structured visual representations directly from textual descriptions. Existing approaches in text-to-image and text-to-code generation lack the logical organization and flexibility needed to produce accurate, editable diagrams, often resulting in outputs that are either unstructured or difficult to modify. To address th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11916v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11916v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11916v1-abstract-full" style="display: none;"> We introduce the task of text-to-diagram generation, which focuses on creating structured visual representations directly from textual descriptions. Existing approaches in text-to-image and text-to-code generation lack the logical organization and flexibility needed to produce accurate, editable diagrams, often resulting in outputs that are either unstructured or difficult to modify. To address this gap, we introduce DiagramGenBenchmark, a comprehensive evaluation framework encompassing eight distinct diagram categories, including flowcharts, model architecture diagrams, and mind maps. Additionally, we present DiagramAgent, an innovative framework with four core modules-Plan Agent, Code Agent, Check Agent, and Diagram-to-Code Agent-designed to facilitate both the generation and refinement of complex diagrams. Our extensive experiments, which combine objective metrics with human evaluations, demonstrate that DiagramAgent significantly outperforms existing baseline models in terms of accuracy, structural coherence, and modifiability. This work not only establishes a foundational benchmark for the text-to-diagram generation task but also introduces a powerful toolset to advance research and applications in this emerging area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11916v1-abstract-full').style.display = 'none'; document.getElementById('2411.11916v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11278">arXiv:2411.11278</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.11278">pdf</a>, <a href="https://arxiv.org/format/2411.11278">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Towards Open-Vocabulary Audio-Visual Event Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jinxing Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+D">Dan Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruohao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+Y">Yuxin Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+J">Jingjing Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+Y">Yiran Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+X">Xiaojun Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Meng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11278v1-abstract-short" style="display: inline;"> The Audio-Visual Event Localization (AVEL) task aims to temporally locate and classify video events that are both audible and visible. Most research in this field assumes a closed-set setting, which restricts these models&#39; ability to handle test data containing event categories absent (unseen) during training. Recently, a few studies have explored AVEL in an open-set setting, enabling the recognit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11278v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11278v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11278v1-abstract-full" style="display: none;"> The Audio-Visual Event Localization (AVEL) task aims to temporally locate and classify video events that are both audible and visible. Most research in this field assumes a closed-set setting, which restricts these models&#39; ability to handle test data containing event categories absent (unseen) during training. Recently, a few studies have explored AVEL in an open-set setting, enabling the recognition of unseen events as ``unknown&#39;&#39;, but without providing category-specific semantics. In this paper, we advance the field by introducing the Open-Vocabulary Audio-Visual Event Localization (OV-AVEL) problem, which requires localizing audio-visual events and predicting explicit categories for both seen and unseen data at inference. To address this new task, we propose the OV-AVEBench dataset, comprising 24,800 videos across 67 real-life audio-visual scenes (seen:unseen = 46:21), each with manual segment-level annotation. We also establish three evaluation metrics for this task. Moreover, we investigate two baseline approaches, one training-free and one using a further fine-tuning paradigm. Specifically, we utilize the unified multimodal space from the pretrained ImageBind model to extract audio, visual, and textual (event classes) features. The training-free baseline then determines predictions by comparing the consistency of audio-text and visual-text feature similarities. The fine-tuning baseline incorporates lightweight temporal layers to encode temporal relations within the audio and visual modalities, using OV-AVEBench training data for model fine-tuning. We evaluate these baselines on the proposed OV-AVEBench dataset and discuss potential directions for future work in this new field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11278v1-abstract-full').style.display = 'none'; document.getElementById('2411.11278v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://github.com/jasongief/OV-AVEL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01075">arXiv:2411.01075</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01075">pdf</a>, <a href="https://arxiv.org/format/2411.01075">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Cephalo: Harnessing Heterogeneous GPU Clusters for Training Transformer Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R+B">Runsheng Benson Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Anand%2C+U">Utkarsh Anand</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+A">Arthur Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Daudjee%2C+K">Khuzaima Daudjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01075v2-abstract-short" style="display: inline;"> Training transformer models requires substantial GPU compute and memory resources. In homogeneous clusters, distributed strategies allocate resources evenly, but this approach is inefficient for heterogeneous clusters, where GPUs differ in power and memory. As high-end GPUs are costly and limited in availability, heterogeneous clusters with diverse GPU types are becoming more common. Existing meth&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01075v2-abstract-full').style.display = 'inline'; document.getElementById('2411.01075v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01075v2-abstract-full" style="display: none;"> Training transformer models requires substantial GPU compute and memory resources. In homogeneous clusters, distributed strategies allocate resources evenly, but this approach is inefficient for heterogeneous clusters, where GPUs differ in power and memory. As high-end GPUs are costly and limited in availability, heterogeneous clusters with diverse GPU types are becoming more common. Existing methods attempt to balance compute across GPUs based on capacity but often underutilize compute due to memory constraints. We present Cephalo, a system that optimizes compute and memory usage by decoupling compute distribution from training state assignment. Cephalo outperforms state-of-the-art methods by achieving significantly higher training throughput while supporting larger models and batch sizes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01075v2-abstract-full').style.display = 'none'; document.getElementById('2411.01075v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00893">arXiv:2411.00893</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.00893">pdf</a>, <a href="https://arxiv.org/format/2411.00893">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Blind Time-of-Flight Imaging: Sparse Deconvolution on the Continuum with Unknown Kernels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruiming Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Bhandari%2C+A">Ayush Bhandari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00893v1-abstract-short" style="display: inline;"> In recent years, computational Time-of-Flight (ToF) imaging has emerged as an exciting and a novel imaging modality that offers new and powerful interpretations of natural scenes, with applications extending to 3D, light-in-flight, and non-line-of-sight imaging. Mathematically, ToF imaging relies on algorithmic super-resolution, as the back-scattered sparse light echoes lie on a finer time resolut&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00893v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00893v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00893v1-abstract-full" style="display: none;"> In recent years, computational Time-of-Flight (ToF) imaging has emerged as an exciting and a novel imaging modality that offers new and powerful interpretations of natural scenes, with applications extending to 3D, light-in-flight, and non-line-of-sight imaging. Mathematically, ToF imaging relies on algorithmic super-resolution, as the back-scattered sparse light echoes lie on a finer time resolution than what digital devices can capture. Traditional methods necessitate knowledge of the emitted light pulses or kernels and employ sparse deconvolution to recover scenes. Unlike previous approaches, this paper introduces a novel, blind ToF imaging technique that does not require kernel calibration and recovers sparse spikes on a continuum, rather than a discrete grid. By studying the shared characteristics of various ToF modalities, we capitalize on the fact that most physical pulses approximately satisfy the Strang-Fix conditions from approximation theory. This leads to a new mathematical formulation for sparse super-resolution. Our recovery approach uses an optimization method that is pivoted on an alternating minimization strategy. We benchmark our blind ToF method against traditional kernel calibration methods, which serve as the baseline. Extensive hardware experiments across different ToF modalities demonstrate the algorithmic advantages, flexibility and empirical robustness of our approach. We show that our work facilitates super-resolution in scenarios where distinguishing between closely spaced objects is challenging, while maintaining performance comparable to known kernel situations. Examples of light-in-flight imaging and light-sweep videos highlight the practical benefits of our blind super-resolution method in enhancing the understanding of natural scenes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00893v1-abstract-full').style.display = 'none'; document.getElementById('2411.00893v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 62H35; 68U99; 78A46; 94A12 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> SIAM Journal on Imaging Sciences, October 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16132">arXiv:2410.16132</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16132">pdf</a>, <a href="https://arxiv.org/format/2410.16132">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Data-driven Crowd Simulation Framework Integrating Physics-informed Machine Learning with Navigation Potential Fields </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Runkang Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+B">Bin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yong Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xiao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Z">Zhengqiu Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16132v1-abstract-short" style="display: inline;"> Traditional rule-based physical models are limited by their reliance on singular physical formulas and parameters, making it difficult to effectively tackle the intricate tasks associated with crowd simulation. Recent research has introduced deep learning methods to tackle these issues, but most current approaches focus primarily on generating pedestrian trajectories, often lacking interpretabilit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16132v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16132v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16132v1-abstract-full" style="display: none;"> Traditional rule-based physical models are limited by their reliance on singular physical formulas and parameters, making it difficult to effectively tackle the intricate tasks associated with crowd simulation. Recent research has introduced deep learning methods to tackle these issues, but most current approaches focus primarily on generating pedestrian trajectories, often lacking interpretability and failing to provide real-time dynamic simulations.To address the aforementioned issues, we propose a novel data-driven crowd simulation framework that integrates Physics-informed Machine Learning (PIML) with navigation potential fields. Our approach leverages the strengths of both physical models and PIML. Specifically, we design an innovative Physics-informed Spatio-temporal Graph Convolutional Network (PI-STGCN) as a data-driven module to predict pedestrian movement trends based on crowd spatio-temporal data. Additionally, we construct a physical model of navigation potential fields based on flow field theory to guide pedestrian movements, thereby reinforcing physical constraints during the simulation. In our framework, navigation potential fields are dynamically computed and updated based on the movement trends predicted by the PI-STGCN, while the updated crowd dynamics, guided by these fields, subsequently feed back into the PI-STGCN. Comparative experiments on two publicly available large-scale real-world datasets across five scenes demonstrate that our proposed framework outperforms existing rule-based methods in accuracy and fidelity. The similarity between simulated and actual pedestrian trajectories increases by 10.8%, while the average error is reduced by 4%. Moreover, our framework exhibits greater adaptability and better interpretability compared to methods that rely solely on deep learning for trajectory generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16132v1-abstract-full').style.display = 'none'; document.getElementById('2410.16132v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00175">arXiv:2410.00175</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.00175">pdf</a>, <a href="https://arxiv.org/format/2410.00175">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Adaptable Moral Stances of Large Language Models on Sexist Content: Implications for Society and Gender Discourse </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Rongchen Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Nejadgholi%2C+I">Isar Nejadgholi</a>, <a href="/search/cs?searchtype=author&amp;query=Dawkins%2C+H">Hillary Dawkins</a>, <a href="/search/cs?searchtype=author&amp;query=Fraser%2C+K+C">Kathleen C. Fraser</a>, <a href="/search/cs?searchtype=author&amp;query=Kiritchenko%2C+S">Svetlana Kiritchenko</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00175v1-abstract-short" style="display: inline;"> This work provides an explanatory view of how LLMs can apply moral reasoning to both criticize and defend sexist language. We assessed eight large language models, all of which demonstrated the capability to provide explanations grounded in varying moral perspectives for both critiquing and endorsing views that reflect sexist assumptions. With both human and automatic evaluation, we show that all&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00175v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00175v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00175v1-abstract-full" style="display: none;"> This work provides an explanatory view of how LLMs can apply moral reasoning to both criticize and defend sexist language. We assessed eight large language models, all of which demonstrated the capability to provide explanations grounded in varying moral perspectives for both critiquing and endorsing views that reflect sexist assumptions. With both human and automatic evaluation, we show that all eight models produce comprehensible and contextually relevant text, which is helpful in understanding diverse views on how sexism is perceived. Also, through analysis of moral foundations cited by LLMs in their arguments, we uncover the diverse ideological perspectives in models&#39; outputs, with some models aligning more with progressive or conservative views on gender roles and sexism. Based on our observations, we caution against the potential misuse of LLMs to justify sexist language. We also highlight that LLMs can serve as tools for understanding the roots of sexist beliefs and designing well-informed interventions. Given this dual capacity, it is crucial to monitor LLMs and design safety mechanisms for their use in applications that involve sensitive societal topics, such as sexism. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00175v1-abstract-full').style.display = 'none'; document.getElementById('2410.00175v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published at EMNLP2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00152">arXiv:2410.00152</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.00152">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Alignment of Histopathological Images Using Cell Segmentation and Point Set Matching for Integrative Cancer Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+J">Jun Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Moore%2C+R">Raymond Moore</a>, <a href="/search/cs?searchtype=author&amp;query=Novotny%2C+B">Brenna Novotny</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+L">Leo Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Fogarty%2C+Z">Zachary Fogarty</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ray Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Svetomir%2C+M">Markovic Svetomir</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Chen Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00152v1-abstract-short" style="display: inline;"> Histopathological imaging is vital for cancer research and clinical practice, with multiplexed Immunofluorescence (MxIF) and Hematoxylin and Eosin (H&amp;E) providing complementary insights. However, aligning different stains at the cell level remains a challenge due to modality differences. In this paper, we present a novel framework for multimodal image alignment using cell segmentation outcomes. By&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00152v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00152v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00152v1-abstract-full" style="display: none;"> Histopathological imaging is vital for cancer research and clinical practice, with multiplexed Immunofluorescence (MxIF) and Hematoxylin and Eosin (H&amp;E) providing complementary insights. However, aligning different stains at the cell level remains a challenge due to modality differences. In this paper, we present a novel framework for multimodal image alignment using cell segmentation outcomes. By treating cells as point sets, we apply Coherent Point Drift (CPD) for initial alignment and refine it with Graph Matching (GM). Evaluated on ovarian cancer tissue microarrays (TMAs), our method achieves high alignment accuracy, enabling integration of cell-level features across modalities and generating virtual H&amp;E images from MxIF data for enhanced clinical interpretation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00152v1-abstract-full').style.display = 'none'; document.getElementById('2410.00152v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">initial version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16472">arXiv:2409.16472</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.16472">pdf</a>, <a href="https://arxiv.org/format/2409.16472">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Sub-Nyquist USF Spectral Estimation: $K$ Frequencies with $6K + 4$ Modulo Samples </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruiming Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yuliang Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Bhandari%2C+A">Ayush Bhandari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16472v1-abstract-short" style="display: inline;"> Digital acquisition of high bandwidth signals is particularly challenging when Nyquist rate sampling is impractical. This has led to extensive research in sub-Nyquist sampling methods, primarily for spectral and sinusoidal frequency estimation. However, these methods struggle with high-dynamic-range (HDR) signals that can saturate analog-to-digital converters (ADCs). Addressing this, we introduce&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16472v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16472v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16472v1-abstract-full" style="display: none;"> Digital acquisition of high bandwidth signals is particularly challenging when Nyquist rate sampling is impractical. This has led to extensive research in sub-Nyquist sampling methods, primarily for spectral and sinusoidal frequency estimation. However, these methods struggle with high-dynamic-range (HDR) signals that can saturate analog-to-digital converters (ADCs). Addressing this, we introduce a novel sub-Nyquist spectral estimation method, driven by the Unlimited Sensing Framework (USF), utilizing a multi-channel system. The sub-Nyquist USF method aliases samples in both amplitude and frequency domains, rendering the inverse problem particularly challenging. Towards this goal, our exact recovery theorem establishes that $K$ sinusoids of arbitrary amplitudes and frequencies can be recovered from $6K + 4$ modulo samples, remarkably, independent of the sampling rate or folding threshold. In the true spirit of sub-Nyquist sampling, via modulo ADC hardware experiments, we demonstrate successful spectrum estimation of HDR signals in the kHz range using Hz range sampling rates (0.078\% Nyquist rate). Our experiments also reveal up to a 33-fold improvement in frequency estimation accuracy using one less bit compared to conventional ADCs. These findings open new avenues in spectral estimation applications, e.g., radars, direction-of-arrival (DoA) estimation, and cognitive radio, showcasing the potential of USF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16472v1-abstract-full').style.display = 'none'; document.getElementById('2409.16472v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 8 figures, accepted to IEEE Trans. on Signal Processing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13985">arXiv:2409.13985</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.13985">pdf</a>, <a href="https://arxiv.org/format/2409.13985">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> LiDAR-based Quadrotor for Slope Inspection in Dense Vegetation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Wenyi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+Y">Yunfan Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Rui Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Kong%2C+V+W+W">Vickie W. W. Kong</a>, <a href="/search/cs?searchtype=author&amp;query=Hung%2C+A+S+P">Anthony S. P. Hung</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+F">Fangcheng Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+Y">Yixi Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+Y">Yuying Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+F">Fu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13985v1-abstract-short" style="display: inline;"> This work presents a LiDAR-based quadrotor system for slope inspection in dense vegetation environments. Cities like Hong Kong are vulnerable to climate hazards, which often result in landslides. To mitigate the landslide risks, the Civil Engineering and Development Department (CEDD) has constructed steel flexible debris-resisting barriers on vulnerable natural catchments to protect residents. How&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13985v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13985v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13985v1-abstract-full" style="display: none;"> This work presents a LiDAR-based quadrotor system for slope inspection in dense vegetation environments. Cities like Hong Kong are vulnerable to climate hazards, which often result in landslides. To mitigate the landslide risks, the Civil Engineering and Development Department (CEDD) has constructed steel flexible debris-resisting barriers on vulnerable natural catchments to protect residents. However, it is necessary to carry out regular inspections to identify any anomalies, which may affect the proper functioning of the barriers. Traditional manual inspection methods face challenges and high costs due to steep terrain and dense vegetation. Compared to manual inspection, unmanned aerial vehicles (UAVs) equipped with LiDAR sensors and cameras have advantages such as maneuverability in complex terrain, and access to narrow areas and high spots. However, conducting slope inspections using UAVs in dense vegetation poses significant challenges. First, in terms of hardware, the overall design of the UAV must carefully consider its maneuverability in narrow spaces, flight time, and the types of onboard sensors required for effective inspection. Second, regarding software, navigation algorithms need to be designed to enable obstacle avoidance flight in dense vegetation environments. To overcome these challenges, we develop a LiDAR-based quadrotor, accompanied by a comprehensive software system. The goal is to deploy our quadrotor in field environments to achieve efficient slope inspection. To assess the feasibility of our hardware and software system, we conduct functional tests in non-operational scenarios. Subsequently, invited by CEDD, we deploy our quadrotor in six field environments, including five flexible debris-resisting barriers located in dense vegetation and one slope that experienced a landslide. These experiments demonstrated the superiority of our quadrotor in slope inspection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13985v1-abstract-full').style.display = 'none'; document.getElementById('2409.13985v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08476">arXiv:2409.08476</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.08476">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Research on Data Right Confirmation Mechanism of Federated Learning based on Blockchain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+X">Xiaogang Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ren Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08476v1-abstract-short" style="display: inline;"> Federated learning can solve the privacy protection problem in distributed data mining and machine learning, and how to protect the ownership, use and income rights of all parties involved in federated learning is an important issue. This paper proposes a federated learning data ownership confirmation mechanism based on blockchain and smart contract, which uses decentralized blockchain technology&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08476v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08476v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08476v1-abstract-full" style="display: none;"> Federated learning can solve the privacy protection problem in distributed data mining and machine learning, and how to protect the ownership, use and income rights of all parties involved in federated learning is an important issue. This paper proposes a federated learning data ownership confirmation mechanism based on blockchain and smart contract, which uses decentralized blockchain technology to save the contribution of each participant on the blockchain, and distributes the benefits of federated learning results through the blockchain. In the local simulation environment of the blockchain, the relevant smart contracts and data structures are simulated and implemented, and the feasibility of the scheme is preliminarily demonstrated. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08476v1-abstract-full').style.display = 'none'; document.getElementById('2409.08476v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">in Chinese language</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07863">arXiv:2409.07863</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.07863">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Collaboration Encouraging Quantum Secret Sharing Scheme with Seal Property </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+X">Xiaogang Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ren Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07863v1-abstract-short" style="display: inline;"> A new concept of quantum secret sharing is introduced, in which collaboration among participants are encourage. And the dealer can ask the participants to send back their share and revoke the secret before a predefined date or event, i.e. so-called seal property. We also give two concrete constructions of CE-QSS-Seal (Collaboration-Encouraging Quantum Secret Sharing with Seal property) scheme. The&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07863v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07863v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07863v1-abstract-full" style="display: none;"> A new concept of quantum secret sharing is introduced, in which collaboration among participants are encourage. And the dealer can ask the participants to send back their share and revoke the secret before a predefined date or event, i.e. so-called seal property. We also give two concrete constructions of CE-QSS-Seal (Collaboration-Encouraging Quantum Secret Sharing with Seal property) scheme. The first one is unconditional secure and achieve the optimal bound of a seal scheme. The second one improve the optimal bound of seal by introducing post-quantum secure computational assumption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07863v1-abstract-full').style.display = 'none'; document.getElementById('2409.07863v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01887">arXiv:2409.01887</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.01887">pdf</a>, <a href="https://arxiv.org/format/2409.01887">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Detecting and Measuring Security Implications of Entangled Domain Verification in CDN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Z">Ziyu Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Z">Zhiwei Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Run Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jianjun Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Mingming Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Ximeng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+T">Tianhao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Z">Zhuoran Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+R+H">Robert H. Deng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01887v1-abstract-short" style="display: inline;"> Content Delivery Networks (CDNs) offer a protection layer for enhancing the security of websites. However, a significant security flaw named Absence of Domain Verification (DVA) has become emerging recently. Although this threat is recognized, the current practices and security flaws of domain verification strategies in CDNs have not been thoroughly investigated. In this paper, we present DVAHunte&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01887v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01887v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01887v1-abstract-full" style="display: none;"> Content Delivery Networks (CDNs) offer a protection layer for enhancing the security of websites. However, a significant security flaw named Absence of Domain Verification (DVA) has become emerging recently. Although this threat is recognized, the current practices and security flaws of domain verification strategies in CDNs have not been thoroughly investigated. In this paper, we present DVAHunter, an automated system for detecting DVA vulnerabilities that can lead to domain abuse in CDNs. Our evaluation of 45 major CDN providers reveals the prevalence of DVA: most (39/45) providers do not perform any verification, and even those that do remain exploitable. Additionally, we used DVAHunter to conduct a large-scale measurement of 89M subdomains from Tranco&#39;s Top 1M sites hosted on the 45 CDNs under evaluation. Our focus was on two primary DVA exploitation scenarios: covert communication and domain hijacking. We identified over 332K subdomains vulnerable to domain abuse. This tool provides deeper insights into DVA exploitation and allows us to propose viable mitigation practices for CDN providers. To date, we have received vulnerability confirmations from 12 providers; 6 (e.g., Edgio, Kuocai) have implemented fixes, and 1 (ChinaNetCenter) are actively working on solutions based on our recommendations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01887v1-abstract-full').style.display = 'none'; document.getElementById('2409.01887v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15663">arXiv:2408.15663</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.15663">pdf</a>, <a href="https://arxiv.org/format/2408.15663">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> NeuroVE: Brain-inspired Linear-Angular Velocity Estimation with Spiking Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xieyuanli Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruibin Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yujie Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Z">Zongtan Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+F">Fangwen Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+H">Huimin Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15663v1-abstract-short" style="display: inline;"> Vision-based ego-velocity estimation is a fundamental problem in robot state estimation. However, the constraints of frame-based cameras, including motion blur and insufficient frame rates in dynamic settings, readily lead to the failure of conventional velocity estimation techniques. Mammals exhibit a remarkable ability to accurately estimate their ego-velocity during aggressive movement. Hence,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15663v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15663v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15663v1-abstract-full" style="display: none;"> Vision-based ego-velocity estimation is a fundamental problem in robot state estimation. However, the constraints of frame-based cameras, including motion blur and insufficient frame rates in dynamic settings, readily lead to the failure of conventional velocity estimation techniques. Mammals exhibit a remarkable ability to accurately estimate their ego-velocity during aggressive movement. Hence, integrating this capability into robots shows great promise for addressing these challenges. In this paper, we propose a brain-inspired framework for linear-angular velocity estimation, dubbed NeuroVE. The NeuroVE framework employs an event camera to capture the motion information and implements spiking neural networks (SNNs) to simulate the brain&#39;s spatial cells&#39; function for velocity estimation. We formulate the velocity estimation as a time-series forecasting problem. To this end, we design an Astrocyte Leaky Integrate-and-Fire (ALIF) neuron model to encode continuous values. Additionally, we have developed an Astrocyte Spiking Long Short-term Memory (ASLSTM) structure, which significantly improves the time-series forecasting capabilities, enabling an accurate estimate of ego-velocity. Results from both simulation and real-world experiments indicate that NeuroVE has achieved an approximate 60% increase in accuracy compared to other SNN-based approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15663v1-abstract-full').style.display = 'none'; document.getElementById('2408.15663v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07916">arXiv:2408.07916</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.07916">pdf</a>, <a href="https://arxiv.org/ps/2408.07916">ps</a>, <a href="https://arxiv.org/format/2408.07916">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> GridSE: Towards Practical Secure Geographic Search via Prefix Symmetric Searchable Encryption (Full Version) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruoyang Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiarui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+S">Shucheng Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07916v1-abstract-short" style="display: inline;"> The proliferation of location-based services and applications has brought significant attention to data and location privacy. While general secure computation and privacy-enhancing techniques can partially address this problem, one outstanding challenge is to provide near latency-free search and compatibility with mainstream geographic search techniques, especially the Discrete Global Grid Systems&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07916v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07916v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07916v1-abstract-full" style="display: none;"> The proliferation of location-based services and applications has brought significant attention to data and location privacy. While general secure computation and privacy-enhancing techniques can partially address this problem, one outstanding challenge is to provide near latency-free search and compatibility with mainstream geographic search techniques, especially the Discrete Global Grid Systems (DGGS). This paper proposes a new construction, namely GridSE, for efficient and DGGS-compatible Secure Geographic Search (SGS) with both backward and forward privacy. We first formulate the notion of a semantic-secure primitive called \textit{symmetric prefix predicate encryption} (SP$^2$E), for predicting whether or not a keyword contains a given prefix, and provide a construction. Then we extend SP$^2$E for dynamic \textit{prefix symmetric searchable encryption} (pSSE), namely GridSE, which supports both backward and forward privacy. GridSE only uses lightweight primitives including cryptographic hash and XOR operations and is extremely efficient. Furthermore, we provide a generic pSSE framework that enables prefix search for traditional dynamic SSE that supports only full keyword search. Experimental results over real-world geographic databases of sizes (by the number of entries) from $10^3$ to $10^7$ and mainstream DGGS techniques show that GridSE achieves a speedup of $150\times$ - $5000\times$ on search latency and a saving of $99\%$ on communication overhead as compared to the state-of-the-art. Interestingly, even compared to plaintext search, GridSE introduces only $1.4\times$ extra computational cost and $0.9\times$ additional communication cost. Source code of our scheme is available at https://github.com/rykieguo1771/GridSE-RAM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07916v1-abstract-full').style.display = 'none'; document.getElementById('2408.07916v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00933">arXiv:2408.00933</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.00933">pdf</a>, <a href="https://arxiv.org/format/2408.00933">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Functional Analysis">math.FA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> </div> </div> <p class="title is-5 mathjax"> On the Structure of Bad Science Matrices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Albors%2C+A">Alex Albors</a>, <a href="/search/cs?searchtype=author&amp;query=Bhatti%2C+H">Hisham Bhatti</a>, <a href="/search/cs?searchtype=author&amp;query=Ganjoo%2C+L">Lukshya Ganjoo</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Raymond Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Kunisky%2C+D">Dmitriy Kunisky</a>, <a href="/search/cs?searchtype=author&amp;query=Mukherjee%2C+R">Rohan Mukherjee</a>, <a href="/search/cs?searchtype=author&amp;query=Stepin%2C+A">Alicia Stepin</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+T">Tony Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00933v2-abstract-short" style="display: inline;"> The bad science matrix problem consists in finding, among all matrices $A \in \mathbb{R}^{n \times n}$ with rows having unit $\ell^2$ norm, one that maximizes $尾(A) = \frac{1}{2^n} \sum_{x \in \{-1, 1\}^n} \|Ax\|_\infty$. Our main contribution is an explicit construction of an $n \times n$ matrix $A$ showing that $尾(A) \geq \sqrt{\log_2(n+1)}$, which is only 18% smaller than the asymptotic rate. W&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00933v2-abstract-full').style.display = 'inline'; document.getElementById('2408.00933v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00933v2-abstract-full" style="display: none;"> The bad science matrix problem consists in finding, among all matrices $A \in \mathbb{R}^{n \times n}$ with rows having unit $\ell^2$ norm, one that maximizes $尾(A) = \frac{1}{2^n} \sum_{x \in \{-1, 1\}^n} \|Ax\|_\infty$. Our main contribution is an explicit construction of an $n \times n$ matrix $A$ showing that $尾(A) \geq \sqrt{\log_2(n+1)}$, which is only 18% smaller than the asymptotic rate. We prove that every entry of any optimal matrix is a square root of a rational number, and we find provably optimal matrices for $n \leq 4$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00933v2-abstract-full').style.display = 'none'; document.getElementById('2408.00933v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 2 figures. Closest to version to be published in Involve, a Journal of Mathematics</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21721">arXiv:2407.21721</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.21721">pdf</a>, <a href="https://arxiv.org/format/2407.21721">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Open-Vocabulary Audio-Visual Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruohao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+L">Liao Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Niu%2C+D">Dantong Niu</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+Y">Yanyu Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Yue%2C+W">Wenzhen Yue</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+J">Ji Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+B">Bowei Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Ying%2C+X">Xianghua Ying</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21721v1-abstract-short" style="display: inline;"> Audio-visual semantic segmentation (AVSS) aims to segment and classify sounding objects in videos with acoustic cues. However, most approaches operate on the close-set assumption and only identify pre-defined categories from training data, lacking the generalization ability to detect novel categories in practical applications. In this paper, we introduce a new task: open-vocabulary audio-visual se&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21721v1-abstract-full').style.display = 'inline'; document.getElementById('2407.21721v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21721v1-abstract-full" style="display: none;"> Audio-visual semantic segmentation (AVSS) aims to segment and classify sounding objects in videos with acoustic cues. However, most approaches operate on the close-set assumption and only identify pre-defined categories from training data, lacking the generalization ability to detect novel categories in practical applications. In this paper, we introduce a new task: open-vocabulary audio-visual semantic segmentation, extending AVSS task to open-world scenarios beyond the annotated label space. This is a more challenging task that requires recognizing all categories, even those that have never been seen nor heard during training. Moreover, we propose the first open-vocabulary AVSS framework, OV-AVSS, which mainly consists of two parts: 1) a universal sound source localization module to perform audio-visual fusion and locate all potential sounding objects and 2) an open-vocabulary classification module to predict categories with the help of the prior knowledge from large-scale pre-trained vision-language models. To properly evaluate the open-vocabulary AVSS, we split zero-shot training and testing subsets based on the AVSBench-semantic benchmark, namely AVSBench-OV. Extensive experiments demonstrate the strong segmentation and zero-shot generalization ability of our model on all categories. On the AVSBench-OV dataset, OV-AVSS achieves 55.43% mIoU on base categories and 29.14% mIoU on novel categories, exceeding the state-of-the-art zero-shot method by 41.88%/20.61% and open-vocabulary method by 10.2%/11.6%. The code is available at https://github.com/ruohaoguo/ovavss. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21721v1-abstract-full').style.display = 'none'; document.getElementById('2407.21721v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACM MM 2024 (Oral)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15568">arXiv:2407.15568</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.15568">pdf</a>, <a href="https://arxiv.org/format/2407.15568">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Empowering Agile-Based Generative Software Development through Human-AI Teamwork </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Sai Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+Z">Zhenchang Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ronghui Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+F">Fangzhou Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Lei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhaoyuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiaowang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Z">Zhiyong Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhuang%2C+Z">Zhiqiang Zhuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15568v2-abstract-short" style="display: inline;"> In software development, the raw requirements proposed by users are frequently incomplete, which impedes the complete implementation of application functionalities. With the emergence of large language models, recent methods with the top-down waterfall model employ a questioning approach for requirement completion, attempting to explore further user requirements. However, users, constrained by the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15568v2-abstract-full').style.display = 'inline'; document.getElementById('2407.15568v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15568v2-abstract-full" style="display: none;"> In software development, the raw requirements proposed by users are frequently incomplete, which impedes the complete implementation of application functionalities. With the emergence of large language models, recent methods with the top-down waterfall model employ a questioning approach for requirement completion, attempting to explore further user requirements. However, users, constrained by their domain knowledge, lack effective acceptance criteria, which fail to capture the implicit needs of the user. Moreover, the cumulative errors of the waterfall model can lead to discrepancies between the generated code and user requirements. The Agile methodologies reduce cumulative errors through lightweight iteration and collaboration with users, but the challenge lies in ensuring semantic consistency between user requirements and the code generated. We propose AgileGen, an agile-based generative software development through human-AI teamwork. AgileGen attempts for the first time to use testable requirements by Gherkin for semantic consistency between requirements and code. Additionally, we innovate in human-AI teamwork, allowing users to participate in decision-making processes they do well and enhancing the completeness of application functionality. Finally, to improve the reliability of user scenarios, a memory pool mechanism is used to collect user decision-making scenarios and recommend them to new users. AgileGen, as a user-friendly interactive system, significantly outperformed existing best methods by 16.4% and garnered higher user satisfaction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15568v2-abstract-full').style.display = 'none'; document.getElementById('2407.15568v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted by ACM TOSEM</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> K.6.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03043">arXiv:2407.03043</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.03043">pdf</a>, <a href="https://arxiv.org/format/2407.03043">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SlerpFace: Face Template Protection via Spherical Linear Interpolation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+Z">Zhizhou Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Mi%2C+Y">Yuxi Mi</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yuge Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jianqing Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Mu%2C+G">Guodong Mu</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+S">Shouhong Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jingyun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Rizen Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yunsheng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+S">Shuigeng Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03043v2-abstract-short" style="display: inline;"> Contemporary face recognition systems use feature templates extracted from face images to identify persons. To enhance privacy, face template protection techniques are widely employed to conceal sensitive identity and appearance information stored in the template. This paper identifies an emerging privacy attack form utilizing diffusion models that could nullify prior protection. The attack can sy&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03043v2-abstract-full').style.display = 'inline'; document.getElementById('2407.03043v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03043v2-abstract-full" style="display: none;"> Contemporary face recognition systems use feature templates extracted from face images to identify persons. To enhance privacy, face template protection techniques are widely employed to conceal sensitive identity and appearance information stored in the template. This paper identifies an emerging privacy attack form utilizing diffusion models that could nullify prior protection. The attack can synthesize high-quality, identity-preserving face images from templates, revealing persons&#39; appearance. Based on studies of the diffusion model&#39;s generative capability, this paper proposes a defense by rotating templates to a noise-like distribution. This is achieved efficiently by spherically and linearly interpolating templates on their located hypersphere. This paper further proposes to group-wisely divide and drop out templates&#39; feature dimensions, to enhance the irreversibility of rotated templates. The proposed techniques are concretized as a novel face template protection technique, SlerpFace. Extensive experiments show that SlerpFace provides satisfactory recognition accuracy and comprehensive protection against inversion and other attack forms, superior to prior arts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03043v2-abstract-full').style.display = 'none'; document.getElementById('2407.03043v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01649">arXiv:2407.01649</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.01649">pdf</a>, <a href="https://arxiv.org/format/2407.01649">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FAFE: Immune Complex Modeling with Geodesic Distance Loss on Noisy Group Frames </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+R">Ruidong Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruihan Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+R">Rui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+S">Shitong Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Y">Yue Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiahan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+J">Jianzhu Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qiang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+Y">Yunan Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+J">Jian Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01649v1-abstract-short" style="display: inline;"> Despite the striking success of general protein folding models such as AlphaFold2(AF2, Jumper et al. (2021)), the accurate computational modeling of antibody-antigen complexes remains a challenging task. In this paper, we first analyze AF2&#39;s primary loss function, known as the Frame Aligned Point Error (FAPE), and raise a previously overlooked issue that FAPE tends to face gradient vanishing probl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01649v1-abstract-full').style.display = 'inline'; document.getElementById('2407.01649v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01649v1-abstract-full" style="display: none;"> Despite the striking success of general protein folding models such as AlphaFold2(AF2, Jumper et al. (2021)), the accurate computational modeling of antibody-antigen complexes remains a challenging task. In this paper, we first analyze AF2&#39;s primary loss function, known as the Frame Aligned Point Error (FAPE), and raise a previously overlooked issue that FAPE tends to face gradient vanishing problem on high-rotational-error targets. To address this fundamental limitation, we propose a novel geodesic loss called Frame Aligned Frame Error (FAFE, denoted as F2E to distinguish from FAPE), which enables the model to better optimize both the rotational and translational errors between two frames. We then prove that F2E can be reformulated as a group-aware geodesic loss, which translates the optimization of the residue-to-residue error to optimizing group-to-group geodesic frame distance. By fine-tuning AF2 with our proposed new loss function, we attain a correct rate of 52.3\% (DockQ $&gt;$ 0.23) on an evaluation set and 43.8\% correct rate on a subset with low homology, with substantial improvement over AF2 by 182\% and 100\% respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01649v1-abstract-full').style.display = 'none'; document.getElementById('2407.01649v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16279">arXiv:2406.16279</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.16279">pdf</a>, <a href="https://arxiv.org/format/2406.16279">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SegNet4D: Efficient Instance-Aware 4D Semantic Segmentation for LiDAR Point Cloud </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+N">Neng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruibin Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+C">Chenghao Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Ziyue Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hui Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+H">Huimin Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zhiqiang Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xieyuanli Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16279v3-abstract-short" style="display: inline;"> 4D LiDAR semantic segmentation, also referred to as multi-scan semantic segmentation, plays a crucial role in enhancing the environmental understanding capabilities of autonomous vehicles or robots. It classifies the semantic category of each LiDAR measurement point and detects whether it is dynamic, a critical ability for tasks like obstacle avoidance and autonomous navigation. Existing approache&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16279v3-abstract-full').style.display = 'inline'; document.getElementById('2406.16279v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16279v3-abstract-full" style="display: none;"> 4D LiDAR semantic segmentation, also referred to as multi-scan semantic segmentation, plays a crucial role in enhancing the environmental understanding capabilities of autonomous vehicles or robots. It classifies the semantic category of each LiDAR measurement point and detects whether it is dynamic, a critical ability for tasks like obstacle avoidance and autonomous navigation. Existing approaches often rely on computationally heavy 4D convolutions or recursive networks, which result in poor real-time performance, making them unsuitable for online robotics and autonomous driving applications. In this paper, we introduce SegNet4D, a novel real-time 4D semantic segmentation network offering both efficiency and strong semantic understanding. SegNet4D addresses 4D segmentation as two tasks: single-scan semantic segmentation and moving object segmentation, each tackled by a separate network head. Both results are combined in a motion-semantic fusion module to achieve comprehensive 4D segmentation. Additionally, instance information is extracted from the current scan and exploited for instance-wise segmentation consistency. Our approach surpasses state-of-the-art in both multi-scan semantic segmentation and moving object segmentation while offering greater efficiency, enabling real-time operation. Besides, its effectiveness and efficiency have also been validated on a real-world unmanned ground platform. Our code will be released at https://github.com/nubot-nudt/SegNet4D. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16279v3-abstract-full').style.display = 'none'; document.getElementById('2406.16279v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16253">arXiv:2406.16253</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.16253">pdf</a>, <a href="https://arxiv.org/format/2406.16253">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLMs Assist NLP Researchers: Critique Paper (Meta-)Reviewing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Du%2C+J">Jiangshu Du</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yibo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+W">Wenting Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+Z">Zhongfen Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+S">Shuaiqi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Lou%2C+R">Renze Lou</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+H+P">Henry Peng Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Venkit%2C+P+N">Pranav Narayanan Venkit</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+N">Nan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Srinath%2C+M">Mukund Srinath</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H+R">Haoran Ranran Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+V">Vipul Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yinghui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+F">Fei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tianlin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+P">Pengzhi Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+C">Congying Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+C">Chen Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiayang Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhaowei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+Y">Ying Su</a>, <a href="/search/cs?searchtype=author&amp;query=Shah%2C+R+S">Raj Sanjay Shah</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruohao Guo</a> , et al. (15 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16253v3-abstract-short" style="display: inline;"> This work is motivated by two key trends. On one hand, large language models (LLMs) have shown remarkable versatility in various generative tasks such as writing, drawing, and question answering, significantly reducing the time required for many routine tasks. On the other hand, researchers, whose work is not only time-consuming but also highly expertise-demanding, face increasing challenges as th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16253v3-abstract-full').style.display = 'inline'; document.getElementById('2406.16253v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16253v3-abstract-full" style="display: none;"> This work is motivated by two key trends. On one hand, large language models (LLMs) have shown remarkable versatility in various generative tasks such as writing, drawing, and question answering, significantly reducing the time required for many routine tasks. On the other hand, researchers, whose work is not only time-consuming but also highly expertise-demanding, face increasing challenges as they have to spend more time reading, writing, and reviewing papers. This raises the question: how can LLMs potentially assist researchers in alleviating their heavy workload? This study focuses on the topic of LLMs assist NLP Researchers, particularly examining the effectiveness of LLM in assisting paper (meta-)reviewing and its recognizability. To address this, we constructed the ReviewCritique dataset, which includes two types of information: (i) NLP papers (initial submissions rather than camera-ready) with both human-written and LLM-generated reviews, and (ii) each review comes with &#34;deficiency&#34; labels and corresponding explanations for individual segments, annotated by experts. Using ReviewCritique, this study explores two threads of research questions: (i) &#34;LLMs as Reviewers&#34;, how do reviews generated by LLMs compare with those written by humans in terms of quality and distinguishability? (ii) &#34;LLMs as Metareviewers&#34;, how effectively can LLMs identify potential issues, such as Deficient or unprofessional review segments, within individual paper reviews? To our knowledge, this is the first work to provide such a comprehensive analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16253v3-abstract-full').style.display = 'none'; document.getElementById('2406.16253v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EMNLP 2024 main conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00735">arXiv:2406.00735</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.00735">pdf</a>, <a href="https://arxiv.org/format/2406.00735">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Full-Atom Peptide Design based on Multi-modal Flow Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiahan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+C">Chaoran Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zuofan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruihan Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+S">Shitong Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+Z">Zhizhou Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+J">Jian Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+J">Jianzhu Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00735v1-abstract-short" style="display: inline;"> Peptides, short chains of amino acid residues, play a vital role in numerous biological processes by interacting with other target molecules, offering substantial potential in drug discovery. In this work, we present PepFlow, the first multi-modal deep generative model grounded in the flow-matching framework for the design of full-atom peptides that target specific protein receptors. Drawing inspi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00735v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00735v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00735v1-abstract-full" style="display: none;"> Peptides, short chains of amino acid residues, play a vital role in numerous biological processes by interacting with other target molecules, offering substantial potential in drug discovery. In this work, we present PepFlow, the first multi-modal deep generative model grounded in the flow-matching framework for the design of full-atom peptides that target specific protein receptors. Drawing inspiration from the crucial roles of residue backbone orientations and side-chain dynamics in protein-peptide interactions, we characterize the peptide structure using rigid backbone frames within the $\mathrm{SE}(3)$ manifold and side-chain angles on high-dimensional tori. Furthermore, we represent discrete residue types in the peptide sequence as categorical distributions on the probability simplex. By learning the joint distributions of each modality using derived flows and vector fields on corresponding manifolds, our method excels in the fine-grained design of full-atom peptides. Harnessing the multi-modal paradigm, our approach adeptly tackles various tasks such as fix-backbone sequence design and side-chain packing through partial sampling. Through meticulously crafted experiments, we demonstrate that PepFlow exhibits superior performance in comprehensive benchmarks, highlighting its significant potential in computational peptide design and analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00735v1-abstract-full').style.display = 'none'; document.getElementById('2406.00735v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00041">arXiv:2406.00041</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.00041">pdf</a>, <a href="https://arxiv.org/format/2406.00041">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> QUB-Cirdan at &#34;Discharge Me!&#34;: Zero shot discharge letter generation by open-source LLM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Rui Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Farnan%2C+G">Greg Farnan</a>, <a href="/search/cs?searchtype=author&amp;query=McLaughlin%2C+N">Niall McLaughlin</a>, <a href="/search/cs?searchtype=author&amp;query=Devereux%2C+B">Barry Devereux</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00041v2-abstract-short" style="display: inline;"> The BioNLP ACL&#39;24 Shared Task on Streamlining Discharge Documentation aims to reduce the administrative burden on clinicians by automating the creation of critical sections of patient discharge letters. This paper presents our approach using the Llama3 8B quantized model to generate the &#34;Brief Hospital Course&#34; and &#34;Discharge Instructions&#34; sections. We employ a zero-shot method combined with Retrie&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00041v2-abstract-full').style.display = 'inline'; document.getElementById('2406.00041v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00041v2-abstract-full" style="display: none;"> The BioNLP ACL&#39;24 Shared Task on Streamlining Discharge Documentation aims to reduce the administrative burden on clinicians by automating the creation of critical sections of patient discharge letters. This paper presents our approach using the Llama3 8B quantized model to generate the &#34;Brief Hospital Course&#34; and &#34;Discharge Instructions&#34; sections. We employ a zero-shot method combined with Retrieval-Augmented Generation (RAG) to produce concise, contextually accurate summaries. Our contributions include the development of a curated template-based approach to ensure reliability and consistency, as well as the integration of RAG for word count prediction. We also describe several unsuccessful experiments to provide insights into our pathway for the competition. Our results demonstrate the effectiveness and efficiency of our approach, achieving high scores across multiple evaluation metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00041v2-abstract-full').style.display = 'none'; document.getElementById('2406.00041v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">BioNLP 2024 workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20834">arXiv:2405.20834</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.20834">pdf</a>, <a href="https://arxiv.org/format/2405.20834">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Retrieval Meets Reasoning: Even High-school Textbook Knowledge Benefits Multimodal Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tan%2C+C">Cheng Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+J">Jingxuan Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+L">Linzhuang Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+Z">Zhangyang Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Siyuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+B">Bihui Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruifeng Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S+Z">Stan Z. Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20834v1-abstract-short" style="display: inline;"> Large language models equipped with retrieval-augmented generation (RAG) represent a burgeoning field aimed at enhancing answering capabilities by leveraging external knowledge bases. Although the application of RAG with language-only models has been extensively explored, its adaptation into multimodal vision-language models remains nascent. Going beyond mere answer generation, the primary goal of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20834v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20834v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20834v1-abstract-full" style="display: none;"> Large language models equipped with retrieval-augmented generation (RAG) represent a burgeoning field aimed at enhancing answering capabilities by leveraging external knowledge bases. Although the application of RAG with language-only models has been extensively explored, its adaptation into multimodal vision-language models remains nascent. Going beyond mere answer generation, the primary goal of multimodal RAG is to cultivate the models&#39; ability to reason in response to relevant queries. To this end, we introduce a novel multimodal RAG framework named RMR (Retrieval Meets Reasoning). The RMR framework employs a bi-modal retrieval module to identify the most relevant question-answer pairs, which then serve as scaffolds for the multimodal reasoning process. This training-free approach not only encourages the model to engage deeply with the reasoning processes inherent in the retrieved content but also facilitates the generation of answers that are precise and richly interpretable. Surprisingly, utilizing solely the ScienceQA dataset, collected from elementary and high school science curricula, RMR significantly boosts the performance of various vision-language models across a spectrum of benchmark datasets, including A-OKVQA, MMBench, and SEED. These outcomes highlight the substantial potential of our multimodal retrieval and reasoning mechanism to improve the reasoning capabilities of vision-language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20834v1-abstract-full').style.display = 'none'; document.getElementById('2405.20834v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18882">arXiv:2405.18882</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.18882">pdf</a>, <a href="https://arxiv.org/format/2405.18882">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DecomCAM: Advancing Beyond Saliency Maps through Decomposition and Integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yuguang Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Runtang Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+S">Sheng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yimi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+L">Linlin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+B">Bo Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+J">Jilong Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Juan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+B">Baochang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18882v1-abstract-short" style="display: inline;"> Interpreting complex deep networks, notably pre-trained vision-language models (VLMs), is a formidable challenge. Current Class Activation Map (CAM) methods highlight regions revealing the model&#39;s decision-making basis but lack clear saliency maps and detailed interpretability. To bridge this gap, we propose DecomCAM, a novel decomposition-and-integration method that distills shared patterns from&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18882v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18882v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18882v1-abstract-full" style="display: none;"> Interpreting complex deep networks, notably pre-trained vision-language models (VLMs), is a formidable challenge. Current Class Activation Map (CAM) methods highlight regions revealing the model&#39;s decision-making basis but lack clear saliency maps and detailed interpretability. To bridge this gap, we propose DecomCAM, a novel decomposition-and-integration method that distills shared patterns from channel activation maps. Utilizing singular value decomposition, DecomCAM decomposes class-discriminative activation maps into orthogonal sub-saliency maps (OSSMs), which are then integrated together based on their contribution to the target concept. Extensive experiments on six benchmarks reveal that DecomCAM not only excels in locating accuracy but also achieves an optimizing balance between interpretability and computational efficiency. Further analysis unveils that OSSMs correlate with discernible object components, facilitating a granular understanding of the model&#39;s reasoning. This positions DecomCAM as a potential tool for fine-grained interpretation of advanced deep learning models. The code is avaible at https://github.com/CapricornGuang/DecomCAM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18882v1-abstract-full').style.display = 'none'; document.getElementById('2405.18882v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Neurocomputing journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16248">arXiv:2405.16248</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.16248">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Combining Radiomics and Machine Learning Approaches for Objective ASD Diagnosis: Verifying White Matter Associations with ASD </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+J">Junlin Song</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuzhuo Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Y">Yuan Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zetong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Renhao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+L">Lida Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Sui%2C+X">Xinyi Sui</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Q">Qihang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xijiao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+A">Aihua Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Wei Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16248v1-abstract-short" style="display: inline;"> Autism Spectrum Disorder is a condition characterized by a typical brain development leading to impairments in social skills, communication abilities, repetitive behaviors, and sensory processing. There have been many studies combining brain MRI images with machine learning algorithms to achieve objective diagnosis of autism, but the correlation between white matter and autism has not been fully u&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16248v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16248v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16248v1-abstract-full" style="display: none;"> Autism Spectrum Disorder is a condition characterized by a typical brain development leading to impairments in social skills, communication abilities, repetitive behaviors, and sensory processing. There have been many studies combining brain MRI images with machine learning algorithms to achieve objective diagnosis of autism, but the correlation between white matter and autism has not been fully utilized. To address this gap, we develop a computer-aided diagnostic model focusing on white matter regions in brain MRI by employing radiomics and machine learning methods. This study introduced a MultiUNet model for segmenting white matter, leveraging the UNet architecture and utilizing manually segmented MRI images as the training data. Subsequently, we extracted white matter features using the Pyradiomics toolkit and applied different machine learning models such as Support Vector Machine, Random Forest, Logistic Regression, and K-Nearest Neighbors to predict autism. The prediction sets all exceeded 80% accuracy. Additionally, we employed Convolutional Neural Network to analyze segmented white matter images, achieving a prediction accuracy of 86.84%. Notably, Support Vector Machine demonstrated the highest prediction accuracy at 89.47%. These findings not only underscore the efficacy of the models but also establish a link between white matter abnormalities and autism. Our study contributes to a comprehensive evaluation of various diagnostic models for autism and introduces a computer-aided diagnostic algorithm for early and objective autism diagnosis based on MRI white matter regions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16248v1-abstract-full').style.display = 'none'; document.getElementById('2405.16248v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15346">arXiv:2405.15346</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.15346">pdf</a>, <a href="https://arxiv.org/format/2405.15346">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> BiSup: Bidirectional Quantization Error Suppression for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zou%2C+M">Minghui Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ronghui Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Sai Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiaowang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Z">Zhiyong Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15346v1-abstract-short" style="display: inline;"> As the size and context length of Large Language Models (LLMs) grow, weight-activation quantization has emerged as a crucial technique for efficient deployment of LLMs. Compared to weight-only quantization, weight-activation quantization presents greater challenges due to the presence of outliers in activations. Existing methods have made significant progress by exploring mixed-precision quantizat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15346v1-abstract-full').style.display = 'inline'; document.getElementById('2405.15346v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15346v1-abstract-full" style="display: none;"> As the size and context length of Large Language Models (LLMs) grow, weight-activation quantization has emerged as a crucial technique for efficient deployment of LLMs. Compared to weight-only quantization, weight-activation quantization presents greater challenges due to the presence of outliers in activations. Existing methods have made significant progress by exploring mixed-precision quantization and outlier suppression. However, these methods primarily focus on optimizing the results of single matrix multiplication, neglecting the bidirectional propagation of quantization errors in LLMs. Specifically, errors accumulate vertically within the same token through layers, and diffuse horizontally across different tokens due to self-attention mechanisms. To address this issue, we introduce BiSup, a Bidirectional quantization error Suppression method. By constructing appropriate optimizable parameter spaces, BiSup utilizes a small amount of data for quantization-aware parameter-efficient fine-tuning to suppress the error vertical accumulation. Besides, BiSup employs prompt mixed-precision quantization strategy, which preserves high precision for the key-value cache of system prompts, to mitigate the error horizontal diffusion. Extensive experiments on Llama and Qwen families demonstrate that BiSup can improve performance over two state-of-the-art methods (the average WikiText2 perplexity decreases from 13.26 to 9.41 for Atom and from 14.33 to 7.85 for QuaRot under the W3A3-g128 configuration), further facilitating the practical applications of low-bit weight-activation quantization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15346v1-abstract-full').style.display = 'none'; document.getElementById('2405.15346v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.12387">arXiv:2405.12387</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.12387">pdf</a>, <a href="https://arxiv.org/format/2405.12387">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Conformal Counterfactual Inference under Hidden Confounding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zonghao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruocheng Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Ton%2C+J">Jean-Fran莽ois Ton</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.12387v1-abstract-short" style="display: inline;"> Personalized decision making requires the knowledge of potential outcomes under different treatments, and confidence intervals about the potential outcomes further enrich this decision-making process and improve its reliability in high-stakes scenarios. Predicting potential outcomes along with its uncertainty in a counterfactual world poses the foundamental challenge in causal inference. Existing&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12387v1-abstract-full').style.display = 'inline'; document.getElementById('2405.12387v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.12387v1-abstract-full" style="display: none;"> Personalized decision making requires the knowledge of potential outcomes under different treatments, and confidence intervals about the potential outcomes further enrich this decision-making process and improve its reliability in high-stakes scenarios. Predicting potential outcomes along with its uncertainty in a counterfactual world poses the foundamental challenge in causal inference. Existing methods that construct confidence intervals for counterfactuals either rely on the assumption of strong ignorability, or need access to un-identifiable lower and upper bounds that characterize the difference between observational and interventional distributions. To overcome these limitations, we first propose a novel approach wTCP-DR based on transductive weighted conformal prediction, which provides confidence intervals for counterfactual outcomes with marginal converage guarantees, even under hidden confounding. With less restrictive assumptions, our approach requires access to a fraction of interventional data (from randomized controlled trials) to account for the covariate shift from observational distributoin to interventional distribution. Theoretical results explicitly demonstrate the conditions under which our algorithm is strictly advantageous to the naive method that only uses interventional data. After ensuring valid intervals on counterfactuals, it is straightforward to construct intervals for individual treatment effects (ITEs). We demonstrate our method across synthetic and real-world data, including recommendation systems, to verify the superiority of our methods compared against state-of-the-art baselines in terms of both coverage and efficiency <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12387v1-abstract-full').style.display = 'none'; document.getElementById('2405.12387v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in SIGKDD&#39;24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10570">arXiv:2405.10570</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.10570">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Simultaneous Deep Learning of Myocardium Segmentation and T2 Quantification for Acute Myocardial Infarction MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yirong Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Chengyan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+M">Mengtian Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+K">Kunyuan Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Ruan%2C+D">Dan Ruan</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Rui Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+P">Peijun Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jianhua Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+N">Naiming Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+J">Jianzhong Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yinyin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+H">Hang Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+L">Lianxin Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+L">Lilan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+L">Liuhong Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jianjun Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+C">Congbo Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">He Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+X">Xiaobo Qu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10570v3-abstract-short" style="display: inline;"> In cardiac Magnetic Resonance Imaging (MRI) analysis, simultaneous myocardial segmentation and T2 quantification are crucial for assessing myocardial pathologies. Existing methods often address these tasks separately, limiting their synergistic potential. To address this, we propose SQNet, a dual-task network integrating Transformer and Convolutional Neural Network (CNN) components. SQNet features&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10570v3-abstract-full').style.display = 'inline'; document.getElementById('2405.10570v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10570v3-abstract-full" style="display: none;"> In cardiac Magnetic Resonance Imaging (MRI) analysis, simultaneous myocardial segmentation and T2 quantification are crucial for assessing myocardial pathologies. Existing methods often address these tasks separately, limiting their synergistic potential. To address this, we propose SQNet, a dual-task network integrating Transformer and Convolutional Neural Network (CNN) components. SQNet features a T2-refine fusion decoder for quantitative analysis, leveraging global features from the Transformer, and a segmentation decoder with multiple local region supervision for enhanced accuracy. A tight coupling module aligns and fuses CNN and Transformer branch features, enabling SQNet to focus on myocardium regions. Evaluation on healthy controls (HC) and acute myocardial infarction patients (AMI) demonstrates superior segmentation dice scores (89.3/89.2) compared to state-of-the-art methods (87.7/87.9). T2 quantification yields strong linear correlations (Pearson coefficients: 0.84/0.93) with label values for HC/AMI, indicating accurate mapping. Radiologist evaluations confirm SQNet&#39;s superior image quality scores (4.60/4.58 for segmentation, 4.32/4.42 for T2 quantification) over state-of-the-art methods (4.50/4.44 for segmentation, 3.59/4.37 for T2 quantification). SQNet thus offers accurate simultaneous segmentation and quantification, enhancing cardiac disease diagnosis, such as AMI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10570v3-abstract-full').style.display = 'none'; document.getElementById('2405.10570v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 8 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.02941">arXiv:2405.02941</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.02941">pdf</a>, <a href="https://arxiv.org/format/2405.02941">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Boundary-aware Decoupled Flow Networks for Realistic Extreme Rescaling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jinmin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+T">Tao Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jingyun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+K">Kang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shaoming Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+S">Shu-Tao Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Rizen Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.02941v2-abstract-short" style="display: inline;"> Recently developed generative methods, including invertible rescaling network (IRN) based and generative adversarial network (GAN) based methods, have demonstrated exceptional performance in image rescaling. However, IRN-based methods tend to produce over-smoothed results, while GAN-based methods easily generate fake details, which thus hinders their real applications. To address this issue, we pr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.02941v2-abstract-full').style.display = 'inline'; document.getElementById('2405.02941v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.02941v2-abstract-full" style="display: none;"> Recently developed generative methods, including invertible rescaling network (IRN) based and generative adversarial network (GAN) based methods, have demonstrated exceptional performance in image rescaling. However, IRN-based methods tend to produce over-smoothed results, while GAN-based methods easily generate fake details, which thus hinders their real applications. To address this issue, we propose Boundary-aware Decoupled Flow Networks (BDFlow) to generate realistic and visually pleasing results. Unlike previous methods that model high-frequency information as standard Gaussian distribution directly, our BDFlow first decouples the high-frequency information into \textit{semantic high-frequency} that adheres to a Boundary distribution and \textit{non-semantic high-frequency} counterpart that adheres to a Gaussian distribution. Specifically, to capture semantic high-frequency parts accurately, we use Boundary-aware Mask (BAM) to constrain the model to produce rich textures, while non-semantic high-frequency part is randomly sampled from a Gaussian distribution.Comprehensive experiments demonstrate that our BDFlow significantly outperforms other state-of-the-art methods while maintaining lower complexity. Notably, our BDFlow improves the PSNR by 4.4 dB and the SSIM by 0.1 on average over GRAIN, utilizing only 74% of the parameters and 20% of the computation. The code will be available at https://github.com/THU-Kingmin/BAFlow. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.02941v2-abstract-full').style.display = 'none'; document.getElementById('2405.02941v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.18948">arXiv:2404.18948</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.18948">pdf</a>, <a href="https://arxiv.org/format/2404.18948">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Sub-Adjacent Transformer: Improving Time Series Anomaly Detection with Reconstruction Error from Sub-Adjacent Neighborhoods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yue%2C+W">Wenzhen Yue</a>, <a href="/search/cs?searchtype=author&amp;query=Ying%2C+X">Xianghua Ying</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruohao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+D">DongDong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+J">Ji Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+B">Bowei Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yuqing Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+T">Taiyan Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.18948v1-abstract-short" style="display: inline;"> In this paper, we present the Sub-Adjacent Transformer with a novel attention mechanism for unsupervised time series anomaly detection. Unlike previous approaches that rely on all the points within some neighborhood for time point reconstruction, our method restricts the attention to regions not immediately adjacent to the target points, termed sub-adjacent neighborhoods. Our key observation is th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18948v1-abstract-full').style.display = 'inline'; document.getElementById('2404.18948v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.18948v1-abstract-full" style="display: none;"> In this paper, we present the Sub-Adjacent Transformer with a novel attention mechanism for unsupervised time series anomaly detection. Unlike previous approaches that rely on all the points within some neighborhood for time point reconstruction, our method restricts the attention to regions not immediately adjacent to the target points, termed sub-adjacent neighborhoods. Our key observation is that owing to the rarity of anomalies, they typically exhibit more pronounced differences from their sub-adjacent neighborhoods than from their immediate vicinities. By focusing the attention on the sub-adjacent areas, we make the reconstruction of anomalies more challenging, thereby enhancing their detectability. Technically, our approach concentrates attention on the non-diagonal areas of the attention matrix by enlarging the corresponding elements in the training stage. To facilitate the implementation of the desired attention matrix pattern, we adopt linear attention because of its flexibility and adaptability. Moreover, a learnable mapping function is proposed to improve the performance of linear attention. Empirically, the Sub-Adjacent Transformer achieves state-of-the-art performance across six real-world anomaly detection benchmarks, covering diverse fields such as server monitoring, space exploration, and water treatment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18948v1-abstract-full').style.display = 'none'; document.getElementById('2404.18948v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IJCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14827">arXiv:2404.14827</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.14827">pdf</a>, <a href="https://arxiv.org/format/2404.14827">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Sentence-Level or Token-Level? A Comprehensive Study on Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wei%2C+J">Jingxuan Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+L">Linzhuang Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Leng%2C+Y">Yichong Leng</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+X">Xu Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+B">Bihui Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruifeng Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14827v1-abstract-short" style="display: inline;"> Knowledge distillation, transferring knowledge from a teacher model to a student model, has emerged as a powerful technique in neural machine translation for compressing models or simplifying training targets. Knowledge distillation encompasses two primary methods: sentence-level distillation and token-level distillation. In sentence-level distillation, the student model is trained to align with t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14827v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14827v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14827v1-abstract-full" style="display: none;"> Knowledge distillation, transferring knowledge from a teacher model to a student model, has emerged as a powerful technique in neural machine translation for compressing models or simplifying training targets. Knowledge distillation encompasses two primary methods: sentence-level distillation and token-level distillation. In sentence-level distillation, the student model is trained to align with the output of the teacher model, which can alleviate the training difficulty and give student model a comprehensive understanding of global structure. Differently, token-level distillation requires the student model to learn the output distribution of the teacher model, facilitating a more fine-grained transfer of knowledge. Studies have revealed divergent performances between sentence-level and token-level distillation across different scenarios, leading to the confusion on the empirical selection of knowledge distillation methods. In this study, we argue that token-level distillation, with its more complex objective (i.e., distribution), is better suited for ``simple&#39;&#39; scenarios, while sentence-level distillation excels in ``complex&#39;&#39; scenarios. To substantiate our hypothesis, we systematically analyze the performance of distillation methods by varying the model size of student models, the complexity of text, and the difficulty of decoding procedure. While our experimental results validate our hypothesis, defining the complexity level of a given scenario remains a challenging task. So we further introduce a novel hybrid method that combines token-level and sentence-level distillation through a gating mechanism, aiming to leverage the advantages of both individual methods. Experiments demonstrate that the hybrid method surpasses the performance of token-level or sentence-level distillation methods and the previous works by a margin, demonstrating the effectiveness of the proposed hybrid method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14827v1-abstract-full').style.display = 'none'; document.getElementById('2404.14827v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.10343">arXiv:2404.10343</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.10343">pdf</a>, <a href="https://arxiv.org/format/2404.10343">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> The Ninth NTIRE 2024 Efficient Super-Resolution Challenge Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ren%2C+B">Bin Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yawei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Mehta%2C+N">Nancy Mehta</a>, <a href="/search/cs?searchtype=author&amp;query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+H">Hongyuan Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+C">Cheng Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+Y">Yuxin Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+B">Bingnan Han</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhuoyuan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+Y">Yajun Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yuqing Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jizhe Li</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+K">Keji He</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+C">Chao Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Heng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiaolin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+X">Xuanwu Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Zuo%2C+K">Kunlong Zuo</a>, <a href="/search/cs?searchtype=author&amp;query=Liao%2C+B">Bohao Liao</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+P">Peizhe Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+L">Long Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+Z">Zhibo Du</a>, <a href="/search/cs?searchtype=author&amp;query=Di%2C+X">Xin Di</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Wangkai Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yang Wang</a> , et al. (109 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.10343v2-abstract-short" style="display: inline;"> This paper provides a comprehensive review of the NTIRE 2024 challenge, focusing on efficient single-image super-resolution (ESR) solutions and their outcomes. The task of this challenge is to super-resolve an input image with a magnification factor of x4 based on pairs of low and corresponding high-resolution images. The primary objective is to develop networks that optimize various aspects such&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10343v2-abstract-full').style.display = 'inline'; document.getElementById('2404.10343v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.10343v2-abstract-full" style="display: none;"> This paper provides a comprehensive review of the NTIRE 2024 challenge, focusing on efficient single-image super-resolution (ESR) solutions and their outcomes. The task of this challenge is to super-resolve an input image with a magnification factor of x4 based on pairs of low and corresponding high-resolution images. The primary objective is to develop networks that optimize various aspects such as runtime, parameters, and FLOPs, while still maintaining a peak signal-to-noise ratio (PSNR) of approximately 26.90 dB on the DIV2K_LSDIR_valid dataset and 26.99 dB on the DIV2K_LSDIR_test dataset. In addition, this challenge has 4 tracks including the main track (overall performance), sub-track 1 (runtime), sub-track 2 (FLOPs), and sub-track 3 (parameters). In the main track, all three metrics (ie runtime, FLOPs, and parameter count) were considered. The ranking of the main track is calculated based on a weighted sum-up of the scores of all other sub-tracks. In sub-track 1, the practical runtime performance of the submissions was evaluated, and the corresponding score was used to determine the ranking. In sub-track 2, the number of FLOPs was considered. The score calculated based on the corresponding FLOPs was used to determine the ranking. In sub-track 3, the number of parameters was considered. The score calculated based on the corresponding parameters was used to determine the ranking. RLFN is set as the baseline for efficiency measurement. The challenge had 262 registered participants, and 34 teams made valid submissions. They gauge the state-of-the-art in efficient single-image super-resolution. To facilitate the reproducibility of the challenge and enable other researchers to build upon these findings, the code and the pre-trained model of validated solutions are made publicly available at https://github.com/Amazingren/NTIRE2024_ESR/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10343v2-abstract-full').style.display = 'none'; document.getElementById('2404.10343v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The report paper of NTIRE2024 Efficient Super-resolution, accepted by CVPRW2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01548">arXiv:2404.01548</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.01548">pdf</a>, <a href="https://arxiv.org/format/2404.01548">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> mChartQA: A universal benchmark for multimodal Chart Question Answer based on Vision-Language Alignment and Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wei%2C+J">Jingxuan Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+N">Nan Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+G">Guiyong Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+Y">Yin Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+B">BiHui Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruifeng Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01548v1-abstract-short" style="display: inline;"> In the fields of computer vision and natural language processing, multimodal chart question-answering, especially involving color, structure, and textless charts, poses significant challenges. Traditional methods, which typically involve either direct multimodal processing or a table-to-text conversion followed by language model analysis, have limitations in effectively handling these complex scen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01548v1-abstract-full').style.display = 'inline'; document.getElementById('2404.01548v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01548v1-abstract-full" style="display: none;"> In the fields of computer vision and natural language processing, multimodal chart question-answering, especially involving color, structure, and textless charts, poses significant challenges. Traditional methods, which typically involve either direct multimodal processing or a table-to-text conversion followed by language model analysis, have limitations in effectively handling these complex scenarios. This paper introduces a novel multimodal chart question-answering model, specifically designed to address these intricate tasks. Our model integrates visual and linguistic processing, overcoming the constraints of existing methods. We adopt a dual-phase training approach: the initial phase focuses on aligning image and text representations, while the subsequent phase concentrates on optimizing the model&#39;s interpretative and analytical abilities in chart-related queries. This approach has demonstrated superior performance on multiple public datasets, particularly in handling color, structure, and textless chart questions, indicating its effectiveness in complex multimodal tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01548v1-abstract-full').style.display = 'none'; document.getElementById('2404.01548v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+R&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10