Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 232 results for author: <span class="mathjax">Zeng, H</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Zeng%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zeng, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zeng%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zeng, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05364">arXiv:2502.05364</a> <span> [<a href="https://arxiv.org/pdf/2502.05364">pdf</a>, <a href="https://arxiv.org/format/2502.05364">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hypencoder: Hypernetworks for Information Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Killingback%2C+J">Julian Killingback</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hansi Zeng</a>, <a href="/search/cs?searchtype=author&query=Zamani%2C+H">Hamed Zamani</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05364v1-abstract-short" style="display: inline;"> The vast majority of retrieval models depend on vector inner products to produce a relevance score between a query and a document. This naturally limits the expressiveness of the relevance score that can be employed. We propose a new paradigm, instead of producing a vector to represent the query we produce a small neural network which acts as a learned relevance function. This small neural network… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05364v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05364v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05364v1-abstract-full" style="display: none;"> The vast majority of retrieval models depend on vector inner products to produce a relevance score between a query and a document. This naturally limits the expressiveness of the relevance score that can be employed. We propose a new paradigm, instead of producing a vector to represent the query we produce a small neural network which acts as a learned relevance function. This small neural network takes in a representation of the document, in this paper we use a single vector, and produces a scalar relevance score. To produce the little neural network we use a hypernetwork, a network that produce the weights of other networks, as our query encoder or as we call it a Hypencoder. Experiments on in-domain search tasks show that Hypencoder is able to significantly outperform strong dense retrieval models and has higher metrics then reranking models and models an order of magnitude larger. Hypencoder is also shown to generalize well to out-of-domain search tasks. To assess the extent of Hypencoder's capabilities, we evaluate on a set of hard retrieval tasks including tip-of-the-tongue retrieval and instruction-following retrieval tasks and find that the performance gap widens substantially compared to standard retrieval tasks. Furthermore, to demonstrate the practicality of our method we implement an approximate search algorithm and show that our model is able to search 8.8M documents in under 60ms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05364v1-abstract-full').style.display = 'none'; document.getElementById('2502.05364v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05330">arXiv:2502.05330</a> <span> [<a href="https://arxiv.org/pdf/2502.05330">pdf</a>, <a href="https://arxiv.org/format/2502.05330">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-Class Segmentation of Aortic Branches and Zones in Computed Tomography Angiography: The AortaSeg24 Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Imran%2C+M">Muhammad Imran</a>, <a href="/search/cs?searchtype=author&query=Krebs%2C+J+R">Jonathan R. Krebs</a>, <a href="/search/cs?searchtype=author&query=Sivaraman%2C+V+B">Vishal Balaji Sivaraman</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Teng Zhang</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Amarjeet Kumar</a>, <a href="/search/cs?searchtype=author&query=Ueland%2C+W+R">Walker R. Ueland</a>, <a href="/search/cs?searchtype=author&query=Fassler%2C+M+J">Michael J. Fassler</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jinlong Huang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xiao Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lisheng Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+P">Pengcheng Shi</a>, <a href="/search/cs?searchtype=author&query=Rokuss%2C+M">Maximilian Rokuss</a>, <a href="/search/cs?searchtype=author&query=Baumgartner%2C+M">Michael Baumgartner</a>, <a href="/search/cs?searchtype=author&query=Kirchhof%2C+Y">Yannick Kirchhof</a>, <a href="/search/cs?searchtype=author&query=Maier-Hein%2C+K+H">Klaus H. Maier-Hein</a>, <a href="/search/cs?searchtype=author&query=Isensee%2C+F">Fabian Isensee</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuolin Liu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+B">Bing Han</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+B+T">Bong Thanh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+D">Dong-jin Shin</a>, <a href="/search/cs?searchtype=author&query=Ji-Woo%2C+P">Park Ji-Woo</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+M">Mathew Choi</a>, <a href="/search/cs?searchtype=author&query=Uhm%2C+K">Kwang-Hyun Uhm</a>, <a href="/search/cs?searchtype=author&query=Ko%2C+S">Sung-Jea Ko</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chanwoong Lee</a> , et al. (38 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05330v1-abstract-short" style="display: inline;"> Multi-class segmentation of the aorta in computed tomography angiography (CTA) scans is essential for diagnosing and planning complex endovascular treatments for patients with aortic dissections. However, existing methods reduce aortic segmentation to a binary problem, limiting their ability to measure diameters across different branches and zones. Furthermore, no open-source dataset is currently… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05330v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05330v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05330v1-abstract-full" style="display: none;"> Multi-class segmentation of the aorta in computed tomography angiography (CTA) scans is essential for diagnosing and planning complex endovascular treatments for patients with aortic dissections. However, existing methods reduce aortic segmentation to a binary problem, limiting their ability to measure diameters across different branches and zones. Furthermore, no open-source dataset is currently available to support the development of multi-class aortic segmentation methods. To address this gap, we organized the AortaSeg24 MICCAI Challenge, introducing the first dataset of 100 CTA volumes annotated for 23 clinically relevant aortic branches and zones. This dataset was designed to facilitate both model development and validation. The challenge attracted 121 teams worldwide, with participants leveraging state-of-the-art frameworks such as nnU-Net and exploring novel techniques, including cascaded models, data augmentation strategies, and custom loss functions. We evaluated the submitted algorithms using the Dice Similarity Coefficient (DSC) and Normalized Surface Distance (NSD), highlighting the approaches adopted by the top five performing teams. This paper presents the challenge design, dataset details, evaluation metrics, and an in-depth analysis of the top-performing algorithms. The annotated dataset, evaluation code, and implementations of the leading methods are publicly available to support further research. All resources can be accessed at https://aortaseg24.grand-challenge.org. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05330v1-abstract-full').style.display = 'none'; document.getElementById('2502.05330v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04037">arXiv:2502.04037</a> <span> [<a href="https://arxiv.org/pdf/2502.04037">pdf</a>, <a href="https://arxiv.org/format/2502.04037">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exploring Imbalanced Annotations for Effective In-Context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+H">Hongfu Gao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Feipeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hao Zeng</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+D">Deyu Meng</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+B">Bingyi Jing</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Hongxin Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04037v1-abstract-short" style="display: inline;"> Large language models (LLMs) have shown impressive performance on downstream tasks through in-context learning (ICL), which heavily relies on the demonstrations selected from annotated datasets. Existing selection methods may hinge on the distribution of annotated datasets, which can often be long-tailed in real-world scenarios. In this work, we show that imbalanced class distributions in annotate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04037v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04037v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04037v1-abstract-full" style="display: none;"> Large language models (LLMs) have shown impressive performance on downstream tasks through in-context learning (ICL), which heavily relies on the demonstrations selected from annotated datasets. Existing selection methods may hinge on the distribution of annotated datasets, which can often be long-tailed in real-world scenarios. In this work, we show that imbalanced class distributions in annotated datasets significantly degrade the performance of ICL across various tasks and selection methods. Moreover, traditional rebalance methods fail to ameliorate the issue of class imbalance in ICL. Our method is motivated by decomposing the distributional differences between annotated and test datasets into two-component weights: class-wise weights and conditional bias. The key idea behind our method is to estimate the conditional bias by minimizing the empirical error on a balanced validation dataset and to employ the two-component weights to modify the original scoring functions during selection. Our approach can prevent selecting too many demonstrations from a single class while preserving the effectiveness of the original selection methods. Extensive experiments demonstrate the effectiveness of our method, improving the average accuracy by up to 5.46 on common benchmarks with imbalanced datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04037v1-abstract-full').style.display = 'none'; document.getElementById('2502.04037v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03649">arXiv:2502.03649</a> <span> [<a href="https://arxiv.org/pdf/2502.03649">pdf</a>, <a href="https://arxiv.org/format/2502.03649">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> All-in-One Image Compression and Restoration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huimin Zeng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiacheng Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Ziqiang Zheng</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03649v1-abstract-short" style="display: inline;"> Visual images corrupted by various types and levels of degradations are commonly encountered in practical image compression. However, most existing image compression methods are tailored for clean images, therefore struggling to achieve satisfying results on these images. Joint compression and restoration methods typically focus on a single type of degradation and fail to address a variety of degr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03649v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03649v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03649v1-abstract-full" style="display: none;"> Visual images corrupted by various types and levels of degradations are commonly encountered in practical image compression. However, most existing image compression methods are tailored for clean images, therefore struggling to achieve satisfying results on these images. Joint compression and restoration methods typically focus on a single type of degradation and fail to address a variety of degradations in practice. To this end, we propose a unified framework for all-in-one image compression and restoration, which incorporates the image restoration capability against various degradations into the process of image compression. The key challenges involve distinguishing authentic image content from degradations, and flexibly eliminating various degradations without prior knowledge. Specifically, the proposed framework approaches these challenges from two perspectives: i.e., content information aggregation, and degradation representation aggregation. Extensive experiments demonstrate the following merits of our model: 1) superior rate-distortion (RD) performance on various degraded inputs while preserving the performance on clean data; 2) strong generalization ability to real-world and unseen scenarios; 3) higher computing efficiency over compared methods. Our code is available at https://github.com/ZeldaM1/All-in-one. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03649v1-abstract-full').style.display = 'none'; document.getElementById('2502.03649v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to WACV 2025 (oral)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03023">arXiv:2502.03023</a> <span> [<a href="https://arxiv.org/pdf/2502.03023">pdf</a>, <a href="https://arxiv.org/format/2502.03023">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Parametric Scaling Law of Tuning Bias in Conformal Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hao Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kangdao Liu</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+B">Bingyi Jing</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Hongxin Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03023v1-abstract-short" style="display: inline;"> Conformal prediction is a popular framework of uncertainty quantification that constructs prediction sets with coverage guarantees. To uphold the exchangeability assumption, many conformal prediction methods necessitate an additional holdout set for parameter tuning. Yet, the impact of violating this principle on coverage remains underexplored, making it ambiguous in practical applications. In thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03023v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03023v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03023v1-abstract-full" style="display: none;"> Conformal prediction is a popular framework of uncertainty quantification that constructs prediction sets with coverage guarantees. To uphold the exchangeability assumption, many conformal prediction methods necessitate an additional holdout set for parameter tuning. Yet, the impact of violating this principle on coverage remains underexplored, making it ambiguous in practical applications. In this work, we empirically find that the tuning bias - the coverage gap introduced by leveraging the same dataset for tuning and calibration, is negligible for simple parameter tuning in many conformal prediction methods. In particular, we observe the scaling law of the tuning bias: this bias increases with parameter space complexity and decreases with calibration set size. Formally, we establish a theoretical framework to quantify the tuning bias and provide rigorous proof for the scaling law of the tuning bias by deriving its upper bound. In the end, we discuss how to reduce the tuning bias, guided by the theories we developed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03023v1-abstract-full').style.display = 'none'; document.getElementById('2502.03023v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01718">arXiv:2502.01718</a> <span> [<a href="https://arxiv.org/pdf/2502.01718">pdf</a>, <a href="https://arxiv.org/format/2502.01718">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ACECODER: Acing Coder RL via Automated Test-Case Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huaye Zeng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+D">Dongfu Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haozhe Wang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+P">Ping Nie</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaotong Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenhu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01718v3-abstract-short" style="display: inline;"> Most progress in recent coder models has been driven by supervised fine-tuning (SFT), while the potential of reinforcement learning (RL) remains largely unexplored, primarily due to the lack of reliable reward data/model in the code domain. In this paper, we address this challenge by leveraging automated large-scale test-case synthesis to enhance code model training. Specifically, we design a pipe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01718v3-abstract-full').style.display = 'inline'; document.getElementById('2502.01718v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01718v3-abstract-full" style="display: none;"> Most progress in recent coder models has been driven by supervised fine-tuning (SFT), while the potential of reinforcement learning (RL) remains largely unexplored, primarily due to the lack of reliable reward data/model in the code domain. In this paper, we address this challenge by leveraging automated large-scale test-case synthesis to enhance code model training. Specifically, we design a pipeline that generates extensive (question, test-cases) pairs from existing code data. Using these test cases, we construct preference pairs based on pass rates over sampled programs to train reward models with Bradley-Terry loss. It shows an average of 10-point improvement for Llama-3.1-8B-Ins and 5-point improvement for Qwen2.5-Coder-7B-Ins through best-of-32 sampling, making the 7B model on par with 236B DeepSeek-V2.5. Furthermore, we conduct reinforcement learning with both reward models and test-case pass rewards, leading to consistent improvements across HumanEval, MBPP, BigCodeBench, and LiveCodeBench (V4). Notably, we follow the R1-style training to start from Qwen2.5-Coder-base directly and show that our RL training can improve model on HumanEval-plus by over 25\% and MBPP-plus by 6\% for merely 80 optimization steps. We believe our results highlight the huge potential of reinforcement learning in coder models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01718v3-abstract-full').style.display = 'none'; document.getElementById('2502.01718v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 1 figure, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18363">arXiv:2501.18363</a> <span> [<a href="https://arxiv.org/pdf/2501.18363">pdf</a>, <a href="https://arxiv.org/format/2501.18363">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Robust Online Conformal Prediction under Uniform Label Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xi%2C+H">Huajun Xi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kangdao Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hao Zeng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Wenguang Sun</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Hongxin Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18363v2-abstract-short" style="display: inline;"> Conformal prediction is an emerging technique for uncertainty quantification that constructs prediction sets guaranteed to contain the true label with a predefined probability. Recent work develops online conformal prediction methods that adaptively construct prediction sets to accommodate distribution shifts. However, existing algorithms typically assume perfect label accuracy which rarely holds… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18363v2-abstract-full').style.display = 'inline'; document.getElementById('2501.18363v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18363v2-abstract-full" style="display: none;"> Conformal prediction is an emerging technique for uncertainty quantification that constructs prediction sets guaranteed to contain the true label with a predefined probability. Recent work develops online conformal prediction methods that adaptively construct prediction sets to accommodate distribution shifts. However, existing algorithms typically assume perfect label accuracy which rarely holds in practice. In this work, we investigate the robustness of online conformal prediction under uniform label noise with a known noise rate, in both constant and dynamic learning rate schedules. We show that label noise causes a persistent gap between the actual mis-coverage rate and the desired rate $伪$, leading to either overestimated or underestimated coverage guarantees. To address this issue, we propose Noise Robust Online Conformal Prediction (dubbed NR-OCP) by updating the threshold with a novel robust pinball loss, which provides an unbiased estimate of clean pinball loss without requiring ground-truth labels. Our theoretical analysis shows that NR-OCP eliminates the coverage gap in both constant and dynamic learning rate schedules, achieving a convergence rate of $\mathcal{O}(T^{-1/2})$ for both empirical and expected coverage errors under uniform label noise. Extensive experiments demonstrate the effectiveness of our method by achieving both precise coverage and improved efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18363v2-abstract-full').style.display = 'none'; document.getElementById('2501.18363v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15249">arXiv:2501.15249</a> <span> [<a href="https://arxiv.org/pdf/2501.15249">pdf</a>, <a href="https://arxiv.org/format/2501.15249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> An Automatic Sound and Complete Abstraction Method for Generalized Planning with Baggable Types </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+H">Hao Dong</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zheyuan Shi</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hemeng Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yongmei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15249v2-abstract-short" style="display: inline;"> Generalized planning is concerned with how to find a single plan to solve multiple similar planning instances. Abstractions are widely used for solving generalized planning, and QNP (qualitative numeric planning) is a popular abstract model. Recently, Cui et al. showed that a plan solves a sound and complete abstraction of a generalized planning problem if and only if the refined plan solves the o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15249v2-abstract-full').style.display = 'inline'; document.getElementById('2501.15249v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15249v2-abstract-full" style="display: none;"> Generalized planning is concerned with how to find a single plan to solve multiple similar planning instances. Abstractions are widely used for solving generalized planning, and QNP (qualitative numeric planning) is a popular abstract model. Recently, Cui et al. showed that a plan solves a sound and complete abstraction of a generalized planning problem if and only if the refined plan solves the original problem. However, existing work on automatic abstraction for generalized planning can hardly guarantee soundness let alone completeness. In this paper, we propose an automatic sound and complete abstraction method for generalized planning with baggable types. We use a variant of QNP, called bounded QNP (BQNP), where integer variables are increased or decreased by only one. Since BQNP is undecidable, we propose and implement a sound but incomplete solver for BQNP. We present an automatic method to abstract a BQNP problem from a classical planning instance with baggable types. The basic idea for abstraction is to introduce a counter for each bag of indistinguishable tuples of objects. We define a class of domains called proper baggable domains, and show that for such domains, the BQNP problem got by our automatic method is a sound and complete abstraction for a generalized planning problem whose instances share the same bags with the given instance but the sizes of the bags might be different. Thus, the refined plan of a solution to the BQNP problem is a solution to the generalized planning problem. Finally, we implement our abstraction method and experiments on a number of domains demonstrate the promise of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15249v2-abstract-full').style.display = 'none'; document.getElementById('2501.15249v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00707">arXiv:2501.00707</a> <span> [<a href="https://arxiv.org/pdf/2501.00707">pdf</a>, <a href="https://arxiv.org/format/2501.00707">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Everywhere Attack: Attacking Locally and Globally to Boost Targeted Transferability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hui Zeng</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Sanshuai Cui</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Biwei Chen</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+A">Anjie Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00707v1-abstract-short" style="display: inline;"> Adversarial examples' (AE) transferability refers to the phenomenon that AEs crafted with one surrogate model can also fool other models. Notwithstanding remarkable progress in untargeted transferability, its targeted counterpart remains challenging. This paper proposes an everywhere scheme to boost targeted transferability. Our idea is to attack a victim image both globally and locally. We aim to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00707v1-abstract-full').style.display = 'inline'; document.getElementById('2501.00707v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00707v1-abstract-full" style="display: none;"> Adversarial examples' (AE) transferability refers to the phenomenon that AEs crafted with one surrogate model can also fool other models. Notwithstanding remarkable progress in untargeted transferability, its targeted counterpart remains challenging. This paper proposes an everywhere scheme to boost targeted transferability. Our idea is to attack a victim image both globally and locally. We aim to optimize 'an army of targets' in every local image region instead of the previous works that optimize a high-confidence target in the image. Specifically, we split a victim image into non-overlap blocks and jointly mount a targeted attack on each block. Such a strategy mitigates transfer failures caused by attention inconsistency between surrogate and victim models and thus results in stronger transferability. Our approach is method-agnostic, which means it can be easily combined with existing transferable attacks for even higher transferability. Extensive experiments on ImageNet demonstrate that the proposed approach universally improves the state-of-the-art targeted attacks by a clear margin, e.g., the transferability of the widely adopted Logit attack can be improved by 28.8%-300%.We also evaluate the crafted AEs on a real-world platform: Google Cloud Vision. Results further support the superiority of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00707v1-abstract-full').style.display = 'none'; document.getElementById('2501.00707v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 6 figures, 8 tables, accepted by 2025AAAI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20807">arXiv:2412.20807</a> <span> [<a href="https://arxiv.org/pdf/2412.20807">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Two Heads Are Better Than One: Averaging along Fine-Tuning to Improve Targeted Transferability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hui Zeng</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Sanshuai Cui</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Biwei Chen</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+A">Anjie Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20807v1-abstract-short" style="display: inline;"> With much longer optimization time than that of untargeted attacks notwithstanding, the transferability of targeted attacks is still far from satisfactory. Recent studies reveal that fine-tuning an existing adversarial example (AE) in feature space can efficiently boost its targeted transferability. However, existing fine-tuning schemes only utilize the endpoint and ignore the valuable information… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20807v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20807v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20807v1-abstract-full" style="display: none;"> With much longer optimization time than that of untargeted attacks notwithstanding, the transferability of targeted attacks is still far from satisfactory. Recent studies reveal that fine-tuning an existing adversarial example (AE) in feature space can efficiently boost its targeted transferability. However, existing fine-tuning schemes only utilize the endpoint and ignore the valuable information in the fine-tuning trajectory. Noting that the vanilla fine-tuning trajectory tends to oscillate around the periphery of a flat region of the loss surface, we propose averaging over the fine-tuning trajectory to pull the crafted AE towards a more centered region. We compare the proposed method with existing fine-tuning schemes by integrating them with state-of-the-art targeted attacks in various attacking scenarios. Experimental results uphold the superiority of the proposed method in boosting targeted transferability. The code is available at github.com/zengh5/Avg_FT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20807v1-abstract-full').style.display = 'none'; document.getElementById('2412.20807v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 6 figures, accepted by 2025ICASSP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.15990">arXiv:2412.15990</a> <span> [<a href="https://arxiv.org/pdf/2412.15990">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Feedback Regulated Opto-Mechanical Soft Robotic Actuators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jianfeng Yang</a>, <a href="/search/cs?searchtype=author&query=Pi%2C+H">Haotian Pi</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zixuan Deng</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Hongshuang Guo</a>, <a href="/search/cs?searchtype=author&query=Shou%2C+W">Wan Shou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hao Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.15990v1-abstract-short" style="display: inline;"> Natural organisms can convert environmental stimuli into sensory feedback to regulate their body and realize active adaptivity. However, realizing such a feedback-regulation mechanism in synthetic material systems remains a grand challenge. It is believed that achieving complex feedback mechanisms in responsive materials will pave the way toward autonomous, intelligent structure and actuation with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15990v1-abstract-full').style.display = 'inline'; document.getElementById('2412.15990v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.15990v1-abstract-full" style="display: none;"> Natural organisms can convert environmental stimuli into sensory feedback to regulate their body and realize active adaptivity. However, realizing such a feedback-regulation mechanism in synthetic material systems remains a grand challenge. It is believed that achieving complex feedback mechanisms in responsive materials will pave the way toward autonomous, intelligent structure and actuation without complex electronics. Inspired by living systems, we report a general principle to design and construct such feedback loops in light-responsive materials. Specifically, we design a baffle-actuator mechanism to incorporate programmed feedback into the opto-mechanical responsiveness. By simply addressing the baffle position with respect to the incident light beam, positive and negative feedback are programmed. We demonstrate the transformation of a light-bending strip into a switcher, where the intensity of light determines the energy barrier under positive feedback, realizing multi-stable shape-morphing. By leveraging the negative feedback and associated homeostasis, we demonstrate two soft robots, i.e., a locomotor and a swimmer. Furthermore, we unveil the ubiquity of feedback in light-responsive materials, which provides new insight into self-regulated robotic matters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15990v1-abstract-full').style.display = 'none'; document.getElementById('2412.15990v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11736">arXiv:2412.11736</a> <span> [<a href="https://arxiv.org/pdf/2412.11736">pdf</a>, <a href="https://arxiv.org/format/2412.11736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Personalized LLM for Generating Customized Responses to the Same Query from Different Users </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hang Zeng</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+C">Chaoyue Niu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fan Wu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+C">Chengfei Lv</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Guihai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11736v1-abstract-short" style="display: inline;"> Existing work on large language model (LLM) personalization assigned different responding roles to LLM, but overlooked the diversity of questioners. In this work, we propose a new form of questioner-aware LLM personalization, generating different responses even for the same query from different questioners. We design a dual-tower model architecture with a cross-questioner general encoder and a que… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11736v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11736v1-abstract-full" style="display: none;"> Existing work on large language model (LLM) personalization assigned different responding roles to LLM, but overlooked the diversity of questioners. In this work, we propose a new form of questioner-aware LLM personalization, generating different responses even for the same query from different questioners. We design a dual-tower model architecture with a cross-questioner general encoder and a questioner-specific encoder. We further apply contrastive learning with multi-view augmentation, pulling close the dialogue representations of the same questioner, while pulling apart those of different questioners. To mitigate the impact of question diversity on questioner-contrastive learning, we cluster the dialogues based on question similarity and restrict the scope of contrastive learning within each cluster. We also build a multi-questioner dataset from English and Chinese scripts and WeChat records, called MQDialog, containing 173 questioners and 12 responders. Extensive evaluation with different metrics shows a significant improvement in the quality of personalized response generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11736v1-abstract-full').style.display = 'none'; document.getElementById('2412.11736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11165">arXiv:2412.11165</a> <span> [<a href="https://arxiv.org/pdf/2412.11165">pdf</a>, <a href="https://arxiv.org/format/2412.11165">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OTLRM: Orthogonal Learning-based Low-Rank Metric for Multi-Dimensional Inverse Problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiangming Wang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Haijin Zeng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiaoyang Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sheng Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yongyong Chen</a>, <a href="/search/cs?searchtype=author&query=Chao%2C+G">Guoqing Chao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11165v1-abstract-short" style="display: inline;"> In real-world scenarios, complex data such as multispectral images and multi-frame videos inherently exhibit robust low-rank property. This property is vital for multi-dimensional inverse problems, such as tensor completion, spectral imaging reconstruction, and multispectral image denoising. Existing tensor singular value decomposition (t-SVD) definitions rely on hand-designed or pre-given transfo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11165v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11165v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11165v1-abstract-full" style="display: none;"> In real-world scenarios, complex data such as multispectral images and multi-frame videos inherently exhibit robust low-rank property. This property is vital for multi-dimensional inverse problems, such as tensor completion, spectral imaging reconstruction, and multispectral image denoising. Existing tensor singular value decomposition (t-SVD) definitions rely on hand-designed or pre-given transforms, which lack flexibility for defining tensor nuclear norm (TNN). The TNN-regularized optimization problem is solved by the singular value thresholding (SVT) operator, which leverages the t-SVD framework to obtain the low-rank tensor. However, it is quite complicated to introduce SVT into deep neural networks due to the numerical instability problem in solving the derivatives of the eigenvectors. In this paper, we introduce a novel data-driven generative low-rank t-SVD model based on the learnable orthogonal transform, which can be naturally solved under its representation. Prompted by the linear algebra theorem of the Householder transformation, our learnable orthogonal transform is achieved by constructing an endogenously orthogonal matrix adaptable to neural networks, optimizing it as arbitrary orthogonal matrices. Additionally, we propose a low-rank solver as a generalization of SVT, which utilizes an efficient representation of generative networks to obtain low-rank structures. Extensive experiments highlight its significant restoration enhancements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11165v1-abstract-full').style.display = 'none'; document.getElementById('2412.11165v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05557">arXiv:2412.05557</a> <span> [<a href="https://arxiv.org/pdf/2412.05557">pdf</a>, <a href="https://arxiv.org/format/2412.05557">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CoE: Deep Coupled Embedding for Non-Rigid Point Cloud Correspondences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huajian Zeng</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+M">Maolin Gao</a>, <a href="/search/cs?searchtype=author&query=Cremers%2C+D">Daniel Cremers</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05557v2-abstract-short" style="display: inline;"> The interest in matching non-rigidly deformed shapes represented as raw point clouds is rising due to the proliferation of low-cost 3D sensors. Yet, the task is challenging since point clouds are irregular and there is a lack of intrinsic shape information. We propose to tackle these challenges by learning a new shape representation -- a per-point high dimensional embedding, in an embedding space… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05557v2-abstract-full').style.display = 'inline'; document.getElementById('2412.05557v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05557v2-abstract-full" style="display: none;"> The interest in matching non-rigidly deformed shapes represented as raw point clouds is rising due to the proliferation of low-cost 3D sensors. Yet, the task is challenging since point clouds are irregular and there is a lack of intrinsic shape information. We propose to tackle these challenges by learning a new shape representation -- a per-point high dimensional embedding, in an embedding space where semantically similar points share similar embeddings. The learned embedding has multiple beneficial properties: it is aware of the underlying shape geometry and is robust to shape deformations and various shape artefacts, such as noise and partiality. Consequently, this embedding can be directly employed to retrieve high-quality dense correspondences through a simple nearest neighbor search in the embedding space. Extensive experiments demonstrate new state-of-the-art results and robustness in numerous challenging non-rigid shape matching benchmarks and show its great potential in other shape analysis tasks, such as segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05557v2-abstract-full').style.display = 'none'; document.getElementById('2412.05557v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01785">arXiv:2411.01785</a> <span> [<a href="https://arxiv.org/pdf/2411.01785">pdf</a>, <a href="https://arxiv.org/format/2411.01785">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Transferable Sequential Recommendation via Vector Quantized Meta Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yue%2C+Z">Zhenrui Yue</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huimin Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a>, <a href="/search/cs?searchtype=author&query=McAuley%2C+J">Julian McAuley</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01785v1-abstract-short" style="display: inline;"> While sequential recommendation achieves significant progress on capturing user-item transition patterns, transferring such large-scale recommender systems remains challenging due to the disjoint user and item groups across domains. In this paper, we propose a vector quantized meta learning for transferable sequential recommenders (MetaRec). Without requiring additional modalities or shared inform… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01785v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01785v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01785v1-abstract-full" style="display: none;"> While sequential recommendation achieves significant progress on capturing user-item transition patterns, transferring such large-scale recommender systems remains challenging due to the disjoint user and item groups across domains. In this paper, we propose a vector quantized meta learning for transferable sequential recommenders (MetaRec). Without requiring additional modalities or shared information across domains, our approach leverages user-item interactions from multiple source domains to improve the target domain performance. To solve the input heterogeneity issue, we adopt vector quantization that maps item embeddings from heterogeneous input spaces to a shared feature space. Moreover, our meta transfer paradigm exploits limited target data to guide the transfer of source domain knowledge to the target domain (i.e., learn to transfer). In addition, MetaRec adaptively transfers from multiple source tasks by rescaling meta gradients based on the source-target domain similarity, enabling selective learning to improve recommendation performance. To validate the effectiveness of our approach, we perform extensive experiments on benchmark datasets, where MetaRec consistently outperforms baseline methods by a considerable margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01785v1-abstract-full').style.display = 'none'; document.getElementById('2411.01785v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to BigData 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21966">arXiv:2410.21966</a> <span> [<a href="https://arxiv.org/pdf/2410.21966">pdf</a>, <a href="https://arxiv.org/format/2410.21966">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PrefPaint: Aligning Image Inpainting Diffusion Model with Human Preference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kendong Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhiyu Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chuanhao Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huanqiang Zeng</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+J">Junhui Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21966v2-abstract-short" style="display: inline;"> In this paper, we make the first attempt to align diffusion models for image inpainting with human aesthetic standards via a reinforcement learning framework, significantly improving the quality and visual appeal of inpainted images. Specifically, instead of directly measuring the divergence with paired images, we train a reward model with the dataset we construct, consisting of nearly 51,000 imag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21966v2-abstract-full').style.display = 'inline'; document.getElementById('2410.21966v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21966v2-abstract-full" style="display: none;"> In this paper, we make the first attempt to align diffusion models for image inpainting with human aesthetic standards via a reinforcement learning framework, significantly improving the quality and visual appeal of inpainted images. Specifically, instead of directly measuring the divergence with paired images, we train a reward model with the dataset we construct, consisting of nearly 51,000 images annotated with human preferences. Then, we adopt a reinforcement learning process to fine-tune the distribution of a pre-trained diffusion model for image inpainting in the direction of higher reward. Moreover, we theoretically deduce the upper bound on the error of the reward model, which illustrates the potential confidence of reward estimation throughout the reinforcement alignment process, thereby facilitating accurate regularization. Extensive experiments on inpainting comparison and downstream tasks, such as image extension and 3D reconstruction, demonstrate the effectiveness of our approach, showing significant improvements in the alignment of inpainted images with human preference compared with state-of-the-art methods. This research not only advances the field of image inpainting but also provides a framework for incorporating human preference into the iterative refinement of generative models based on modeling reward accuracy, with broad implications for the design of visually driven AI applications. Our code and dataset are publicly available at https://prefpaint.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21966v2-abstract-full').style.display = 'none'; document.getElementById('2410.21966v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19665">arXiv:2410.19665</a> <span> [<a href="https://arxiv.org/pdf/2410.19665">pdf</a>, <a href="https://arxiv.org/format/2410.19665">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> MetaTrading: An Immersion-Aware Model Trading Framework for Vehicular Metaverse Services </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+H">Hongjia Wu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hui Zeng</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Z">Zhiping Cai</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+T">Tse-Tin Chan</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zhu Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19665v1-abstract-short" style="display: inline;"> Updates of extensive Internet of Things (IoT) data are critical to the immersion of vehicular metaverse services. However, providing high-quality and sustainable data in unstable and resource-constrained vehicular networks remains a significant challenge. To address this problem, we put forth a novel immersion-aware model trading framework that incentivizes metaverse users (MUs) to contribute lear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19665v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19665v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19665v1-abstract-full" style="display: none;"> Updates of extensive Internet of Things (IoT) data are critical to the immersion of vehicular metaverse services. However, providing high-quality and sustainable data in unstable and resource-constrained vehicular networks remains a significant challenge. To address this problem, we put forth a novel immersion-aware model trading framework that incentivizes metaverse users (MUs) to contribute learning models trained by their latest local data for augmented reality (AR) services in the vehicular metaverse, while preserving their privacy through federated learning. To comprehensively evaluate the contribution of locally trained learning models provided by MUs to AR services, we design a new immersion metric that captures service immersion by considering the freshness and accuracy of learning models, as well as the amount and potential value of raw data used for training. We model the trading interactions between metaverse service providers (MSPs) and MUs as an equilibrium problem with equilibrium constraints (EPEC) to analyze and balance their costs and gains. Moreover, considering dynamic network conditions and privacy concerns, we formulate the reward decisions of MSPs as a multi-agent Markov decision process. Then, a fully distributed dynamic reward method based on deep reinforcement learning is presented, which operates without any private information about MUs and other MSPs. Experimental results demonstrate that the proposed framework can effectively provide higher-value models for object detection and classification in AR services on real AR-related vehicle datasets compared to benchmark schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19665v1-abstract-full').style.display = 'none'; document.getElementById('2410.19665v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14214">arXiv:2410.14214</a> <span> [<a href="https://arxiv.org/pdf/2410.14214">pdf</a>, <a href="https://arxiv.org/format/2410.14214">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> MambaSCI: Efficient Mamba-UNet for Quad-Bayer Patterned Video Snapshot Compressive Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zhenghao Pan</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Haijin Zeng</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jiezhang Cao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yongyong Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14214v1-abstract-short" style="display: inline;"> Color video snapshot compressive imaging (SCI) employs computational imaging techniques to capture multiple sequential video frames in a single Bayer-patterned measurement. With the increasing popularity of quad-Bayer pattern in mainstream smartphone cameras for capturing high-resolution videos, mobile photography has become more accessible to a wider audience. However, existing color video SCI re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14214v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14214v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14214v1-abstract-full" style="display: none;"> Color video snapshot compressive imaging (SCI) employs computational imaging techniques to capture multiple sequential video frames in a single Bayer-patterned measurement. With the increasing popularity of quad-Bayer pattern in mainstream smartphone cameras for capturing high-resolution videos, mobile photography has become more accessible to a wider audience. However, existing color video SCI reconstruction algorithms are designed based on the traditional Bayer pattern. When applied to videos captured by quad-Bayer cameras, these algorithms often result in color distortion and ineffective demosaicing, rendering them impractical for primary equipment. To address this challenge, we propose the MambaSCI method, which leverages the Mamba and UNet architectures for efficient reconstruction of quad-Bayer patterned color video SCI. To the best of our knowledge, our work presents the first algorithm for quad-Bayer patterned SCI reconstruction, and also the initial application of the Mamba model to this task. Specifically, we customize Residual-Mamba-Blocks, which residually connect the Spatial-Temporal Mamba (STMamba), Edge-Detail-Reconstruction (EDR) module, and Channel Attention (CA) module. Respectively, STMamba is used to model long-range spatial-temporal dependencies with linear complexity, EDR is for better edge-detail reconstruction, and CA is used to compensate for the missing channel information interaction in Mamba model. Experiments demonstrate that MambaSCI surpasses state-of-the-art methods with lower computational and memory costs. PyTorch style pseudo-code for the core modules is provided in the supplementary materials. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14214v1-abstract-full').style.display = 'none'; document.getElementById('2410.14214v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12811">arXiv:2410.12811</a> <span> [<a href="https://arxiv.org/pdf/2410.12811">pdf</a>, <a href="https://arxiv.org/format/2410.12811">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Decoding Emotions: Unveiling Facial Expressions through Acoustic Sensing with Contrastive Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guangjing Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Juexing Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Ce Zhou</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+W">Weikang Ding</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huacheng Zeng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tianxing Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Q">Qiben Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12811v1-abstract-short" style="display: inline;"> Expression recognition holds great promise for applications such as content recommendation and mental healthcare by accurately detecting users' emotional states. Traditional methods often rely on cameras or wearable sensors, which raise privacy concerns and add extra device burdens. In addition, existing acoustic-based methods struggle to maintain satisfactory performance when there is a distribut… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12811v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12811v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12811v1-abstract-full" style="display: none;"> Expression recognition holds great promise for applications such as content recommendation and mental healthcare by accurately detecting users' emotional states. Traditional methods often rely on cameras or wearable sensors, which raise privacy concerns and add extra device burdens. In addition, existing acoustic-based methods struggle to maintain satisfactory performance when there is a distribution shift between the training dataset and the inference dataset. In this paper, we introduce FacER+, an active acoustic facial expression recognition system, which eliminates the requirement for external microphone arrays. FacER+ extracts facial expression features by analyzing the echoes of near-ultrasound signals emitted between the 3D facial contour and the earpiece speaker on a smartphone. This approach not only reduces background noise but also enables the identification of different expressions from various users with minimal training data. We develop a contrastive external attention-based model to consistently learn expression features across different users, reducing the distribution differences. Extensive experiments involving 20 volunteers, both with and without masks, demonstrate that FacER+ can accurately recognize six common facial expressions with over 90% accuracy in diverse, user-independent real-life scenarios, surpassing the performance of the leading acoustic sensing methods by 10%. FacER+ offers a robust and practical solution for facial expression recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12811v1-abstract-full').style.display = 'none'; document.getElementById('2410.12811v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The extended version of the 2023 IEEE INFOCOM conference paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11718">arXiv:2410.11718</a> <span> [<a href="https://arxiv.org/pdf/2410.11718">pdf</a>, <a href="https://arxiv.org/format/2410.11718">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Converging to a Lingua Franca: Evolution of Linguistic Regions and Semantics Alignment in Multilingual Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hongchuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Han%2C+S">Senyu Han</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lu Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+K">Kai Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11718v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable performance, particularly in multilingual contexts. While recent studies suggest that LLMs can transfer skills learned in one language to others, the internal mechanisms behind this ability remain unclear. We observed that the neuron activation patterns of LLMs exhibit similarities when processing the same language, revealing the existence… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11718v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11718v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11718v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable performance, particularly in multilingual contexts. While recent studies suggest that LLMs can transfer skills learned in one language to others, the internal mechanisms behind this ability remain unclear. We observed that the neuron activation patterns of LLMs exhibit similarities when processing the same language, revealing the existence and location of key linguistic regions. Additionally, we found that neuron activation patterns are similar when processing sentences with the same semantic meaning in different languages. This indicates that LLMs map semantically identical inputs from different languages into a "Lingua Franca", a common semantic latent space that allows for consistent processing across languages. This semantic alignment becomes more pronounced with training and increased model size, resulting in a more language-agnostic activation pattern. Moreover, we found that key linguistic neurons are concentrated in the first and last layers of LLMs, becoming denser in the first layers as training progresses. Experiments on BLOOM and LLaMA2 support these findings, highlighting the structural evolution of multilingual LLMs during training and scaling up. This paper provides insights into the internal workings of LLMs, offering a foundation for future improvements in their cross-lingual capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11718v1-abstract-full').style.display = 'none'; document.getElementById('2410.11718v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 11 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11651">arXiv:2410.11651</a> <span> [<a href="https://arxiv.org/pdf/2410.11651">pdf</a>, <a href="https://arxiv.org/format/2410.11651">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RS-MOCO: A deep learning-based topology-preserving image registration method for cardiac T1 mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chiyi Huang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Longwei Sun</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+D">Dong Liang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+H">Haifeng Liang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hongwu Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yanjie Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11651v1-abstract-short" style="display: inline;"> Cardiac T1 mapping can evaluate various clinical symptoms of myocardial tissue. However, there is currently a lack of effective, robust, and efficient methods for motion correction in cardiac T1 mapping. In this paper, we propose a deep learning-based and topology-preserving image registration framework for motion correction in cardiac T1 mapping. Notably, our proposed implicit consistency constra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11651v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11651v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11651v1-abstract-full" style="display: none;"> Cardiac T1 mapping can evaluate various clinical symptoms of myocardial tissue. However, there is currently a lack of effective, robust, and efficient methods for motion correction in cardiac T1 mapping. In this paper, we propose a deep learning-based and topology-preserving image registration framework for motion correction in cardiac T1 mapping. Notably, our proposed implicit consistency constraint dubbed BLOC, to some extent preserves the image topology in registration by bidirectional consistency constraint and local anti-folding constraint. To address the contrast variation issue, we introduce a weighted image similarity metric for multimodal registration of cardiac T1-weighted images. Besides, a semi-supervised myocardium segmentation network and a dual-domain attention module are integrated into the framework to further improve the performance of the registration. Numerous comparative experiments, as well as ablation studies, demonstrated the effectiveness and high robustness of our method. The results also indicate that the proposed weighted image similarity metric, specifically crafted for our network, contributes a lot to the enhancement of the motion correction efficacy, while the bidirectional consistency constraint combined with the local anti-folding constraint ensures a more desirable topology-preserving registration mapping. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11651v1-abstract-full').style.display = 'none'; document.getElementById('2410.11651v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09408">arXiv:2410.09408</a> <span> [<a href="https://arxiv.org/pdf/2410.09408">pdf</a>, <a href="https://arxiv.org/format/2410.09408">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> C-Adapter: Adapting Deep Classifiers for Efficient Conformal Prediction Sets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kangdao Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hao Zeng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jianguo Huang</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+H">Huiping Zhuang</a>, <a href="/search/cs?searchtype=author&query=Vong%2C+C">Chi-Man Vong</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Hongxin Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09408v2-abstract-short" style="display: inline;"> Conformal prediction, as an emerging uncertainty quantification technique, typically functions as post-hoc processing for the outputs of trained classifiers. To optimize the classifier for maximum predictive efficiency, Conformal Training rectifies the training objective with a regularization that minimizes the average prediction set size at a specific error rate. However, the regularization term… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09408v2-abstract-full').style.display = 'inline'; document.getElementById('2410.09408v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09408v2-abstract-full" style="display: none;"> Conformal prediction, as an emerging uncertainty quantification technique, typically functions as post-hoc processing for the outputs of trained classifiers. To optimize the classifier for maximum predictive efficiency, Conformal Training rectifies the training objective with a regularization that minimizes the average prediction set size at a specific error rate. However, the regularization term inevitably deteriorates the classification accuracy and leads to suboptimal efficiency of conformal predictors. To address this issue, we introduce \textbf{Conformal Adapter} (C-Adapter), an adapter-based tuning method to enhance the efficiency of conformal predictors without sacrificing accuracy. In particular, we implement the adapter as a class of intra order-preserving functions and tune it with our proposed loss that maximizes the discriminability of non-conformity scores between correctly and randomly matched data-label pairs. Using C-Adapter, the model tends to produce extremely high non-conformity scores for incorrect labels, thereby enhancing the efficiency of prediction sets across different coverage rates. Extensive experiments demonstrate that C-Adapter can effectively adapt various classifiers for efficient prediction sets, as well as enhance the conformal training method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09408v2-abstract-full').style.display = 'none'; document.getElementById('2410.09408v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04811">arXiv:2410.04811</a> <span> [<a href="https://arxiv.org/pdf/2410.04811">pdf</a>, <a href="https://arxiv.org/format/2410.04811">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Efficient and Effective Trajectories for Differential Equation-based Image Restoration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhiyu Zhu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+J">Jinhui Hou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huanqiang Zeng</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+J">Junhui Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04811v1-abstract-short" style="display: inline;"> The differential equation-based image restoration approach aims to establish learnable trajectories connecting high-quality images to a tractable distribution, e.g., low-quality images or a Gaussian distribution. In this paper, we reformulate the trajectory optimization of this kind of method, focusing on enhancing both reconstruction quality and efficiency. Initially, we navigate effective restor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04811v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04811v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04811v1-abstract-full" style="display: none;"> The differential equation-based image restoration approach aims to establish learnable trajectories connecting high-quality images to a tractable distribution, e.g., low-quality images or a Gaussian distribution. In this paper, we reformulate the trajectory optimization of this kind of method, focusing on enhancing both reconstruction quality and efficiency. Initially, we navigate effective restoration paths through a reinforcement learning process, gradually steering potential trajectories toward the most precise options. Additionally, to mitigate the considerable computational burden associated with iterative sampling, we propose cost-aware trajectory distillation to streamline complex paths into several manageable steps with adaptable sizes. Moreover, we fine-tune a foundational diffusion model (FLUX) with 12B parameters by using our algorithms, producing a unified framework for handling 7 kinds of image restoration tasks. Extensive experiments showcase the significant superiority of the proposed method, achieving a maximum PSNR improvement of 2.1 dB over state-of-the-art methods, while also greatly enhancing visual perceptual quality. Project page: \url{https://zhu-zhiyu.github.io/FLUX-IR/}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04811v1-abstract-full').style.display = 'none'; document.getElementById('2410.04811v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04343">arXiv:2410.04343</a> <span> [<a href="https://arxiv.org/pdf/2410.04343">pdf</a>, <a href="https://arxiv.org/format/2410.04343">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Inference Scaling for Long-Context Retrieval Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yue%2C+Z">Zhenrui Yue</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+H">Honglei Zhuang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+A">Aijun Bai</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+K">Kai Hui</a>, <a href="/search/cs?searchtype=author&query=Jagerman%2C+R">Rolf Jagerman</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hansi Zeng</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Z">Zhen Qin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dong Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xuanhui Wang</a>, <a href="/search/cs?searchtype=author&query=Bendersky%2C+M">Michael Bendersky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04343v1-abstract-short" style="display: inline;"> The scaling of inference computation has unlocked the potential of long-context large language models (LLMs) across diverse settings. For knowledge-intensive tasks, the increased compute is often allocated to incorporate more external knowledge. However, without effectively utilizing such knowledge, solely expanding context does not always enhance performance. In this work, we investigate inferenc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04343v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04343v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04343v1-abstract-full" style="display: none;"> The scaling of inference computation has unlocked the potential of long-context large language models (LLMs) across diverse settings. For knowledge-intensive tasks, the increased compute is often allocated to incorporate more external knowledge. However, without effectively utilizing such knowledge, solely expanding context does not always enhance performance. In this work, we investigate inference scaling for retrieval augmented generation (RAG), exploring strategies beyond simply increasing the quantity of knowledge. We focus on two inference scaling strategies: in-context learning and iterative prompting. These strategies provide additional flexibility to scale test-time computation (e.g., by increasing retrieved documents or generation steps), thereby enhancing LLMs' ability to effectively acquire and utilize contextual information. We address two key questions: (1) How does RAG performance benefit from the scaling of inference computation when optimally configured? (2) Can we predict the optimal test-time compute allocation for a given budget by modeling the relationship between RAG performance and inference parameters? Our observations reveal that increasing inference computation leads to nearly linear gains in RAG performance when optimally allocated, a relationship we describe as the inference scaling laws for RAG. Building on this, we further develop the computation allocation model to estimate RAG performance across different inference configurations. The model predicts optimal inference parameters under various computation constraints, which align closely with the experimental results. By applying these optimal configurations, we demonstrate that scaling inference compute on long-context LLMs achieves up to 58.9% gains on benchmark datasets compared to standard RAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04343v1-abstract-full').style.display = 'none'; document.getElementById('2410.04343v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02296">arXiv:2410.02296</a> <span> [<a href="https://arxiv.org/pdf/2410.02296">pdf</a>, <a href="https://arxiv.org/format/2410.02296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> How to Make LLMs Strong Node Classifiers? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhe Xu</a>, <a href="/search/cs?searchtype=author&query=Hassani%2C+K">Kaveh Hassani</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Si Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hanqing Zeng</a>, <a href="/search/cs?searchtype=author&query=Yasunaga%2C+M">Michihiro Yasunaga</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Limei Wang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+D">Dongqi Fu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+N">Ning Yao</a>, <a href="/search/cs?searchtype=author&query=Long%2C+B">Bo Long</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+H">Hanghang Tong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02296v2-abstract-short" style="display: inline;"> Language Models (LMs) are increasingly challenging the dominance of domain-specific models, such as Graph Neural Networks (GNNs) and Graph Transformers (GTs), in graph learning tasks. Following this trend, we propose a novel approach that empowers off-the-shelf LMs to achieve performance comparable to state-of-the-art (SOTA) GNNs on node classification tasks, without requiring any architectural mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02296v2-abstract-full').style.display = 'inline'; document.getElementById('2410.02296v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02296v2-abstract-full" style="display: none;"> Language Models (LMs) are increasingly challenging the dominance of domain-specific models, such as Graph Neural Networks (GNNs) and Graph Transformers (GTs), in graph learning tasks. Following this trend, we propose a novel approach that empowers off-the-shelf LMs to achieve performance comparable to state-of-the-art (SOTA) GNNs on node classification tasks, without requiring any architectural modification. By preserving the LM's original architecture, our approach retains a key benefit of LM instruction tuning: the ability to jointly train on diverse datasets, fostering greater flexibility and efficiency. To achieve this, we introduce two key augmentation strategies: (1) Enriching LMs' input using topological and semantic retrieval methods, which provide richer contextual information, and (2) guiding the LMs' classification process through a lightweight GNN classifier that effectively prunes class candidates. Our experiments on real-world datasets show that backbone Flan-T5 LMs equipped with these augmentation strategies outperform SOTA text-output node classifiers and are comparable to top-performing vector-output node classifiers. By bridging the gap between specialized node classifiers and general LMs, this work paves the way for more versatile and widely applicable graph learning models. We will open-source the code upon publication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02296v2-abstract-full').style.display = 'none'; document.getElementById('2410.02296v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01384">arXiv:2410.01384</a> <span> [<a href="https://arxiv.org/pdf/2410.01384">pdf</a>, <a href="https://arxiv.org/format/2410.01384">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TVCG.2024.3456392">10.1109/TVCG.2024.3456392 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> CSLens: Towards Better Deploying Charging Stations via Visual Analytics -- A Coupled Networks Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yutian Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Liwen Xu</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+S">Shaocong Tao</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+Q">Quanxue Guan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Quan Li</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Haipeng Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01384v1-abstract-short" style="display: inline;"> In recent years, the global adoption of electric vehicles (EVs) has surged, prompting a corresponding rise in the installation of charging stations. This proliferation has underscored the importance of expediting the deployment of charging infrastructure. Both academia and industry have thus devoted to addressing the charging station location problem (CSLP) to streamline this process. However, pre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01384v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01384v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01384v1-abstract-full" style="display: none;"> In recent years, the global adoption of electric vehicles (EVs) has surged, prompting a corresponding rise in the installation of charging stations. This proliferation has underscored the importance of expediting the deployment of charging infrastructure. Both academia and industry have thus devoted to addressing the charging station location problem (CSLP) to streamline this process. However, prevailing algorithms addressing CSLP are hampered by restrictive assumptions and computational overhead, leading to a dearth of comprehensive evaluations in the spatiotemporal dimensions. Consequently, their practical viability is restricted. Moreover, the placement of charging stations exerts a significant impact on both the road network and the power grid, which necessitates the evaluation of the potential post-deployment impacts on these interconnected networks holistically. In this study, we propose CSLens, a visual analytics system designed to inform charging station deployment decisions through the lens of coupled transportation and power networks. CSLens offers multiple visualizations and interactive features, empowering users to delve into the existing charging station layout, explore alternative deployment solutions, and assess the ensuring impact. To validate the efficacy of CSLens, we conducted two case studies and engaged in interviews with domain experts. Through these efforts, we substantiated the usability and practical utility of CSLens in enhancing the decision-making process surrounding charging station deployment. Our findings underscore CSLens's potential to serve as a valuable asset in navigating the complexities of charging infrastructure planning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01384v1-abstract-full').style.display = 'none'; document.getElementById('2410.01384v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 6 figures; Accepted by IEEE IEEE Transactions on Visualization and Computer Graphics, 2024 (TVCG)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01364">arXiv:2410.01364</a> <span> [<a href="https://arxiv.org/pdf/2410.01364">pdf</a>, <a href="https://arxiv.org/format/2410.01364">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TVCG.2024.3392587">10.1109/TVCG.2024.3392587 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MARLens: Understanding Multi-agent Reinforcement Learning for Traffic Signal Control via Visual Analytics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yutian Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+G">Guohong Zheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Quan Li</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Haipeng Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01364v1-abstract-short" style="display: inline;"> The issue of traffic congestion poses a significant obstacle to the development of global cities. One promising solution to tackle this problem is intelligent traffic signal control (TSC). Recently, TSC strategies leveraging reinforcement learning (RL) have garnered attention among researchers. However, the evaluation of these models has primarily relied on fixed metrics like reward and queue leng… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01364v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01364v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01364v1-abstract-full" style="display: none;"> The issue of traffic congestion poses a significant obstacle to the development of global cities. One promising solution to tackle this problem is intelligent traffic signal control (TSC). Recently, TSC strategies leveraging reinforcement learning (RL) have garnered attention among researchers. However, the evaluation of these models has primarily relied on fixed metrics like reward and queue length. This limited evaluation approach provides only a narrow view of the model's decision-making process, impeding its practical implementation. Moreover, effective TSC necessitates coordinated actions across multiple intersections. Existing visual analysis solutions fall short when applied in multi-agent settings. In this study, we delve into the challenge of interpretability in multi-agent reinforcement learning (MARL), particularly within the context of TSC. We propose MARLens a visual analytics system tailored to understand MARL-based TSC. Our system serves as a versatile platform for both RL and TSC researchers. It empowers them to explore the model's features from various perspectives, revealing its decision-making processes and shedding light on interactions among different agents. To facilitate quick identification of critical states, we have devised multiple visualization views, complemented by a traffic simulation module that allows users to replay specific training scenarios. To validate the utility of our proposed system, we present three comprehensive case studies, incorporate insights from domain experts through interviews, and conduct a user study. These collective efforts underscore the feasibility and effectiveness of MARLens in enhancing our understanding of MARL-based TSC systems and pave the way for more informed and efficient traffic management strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01364v1-abstract-full').style.display = 'none'; document.getElementById('2410.01364v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 8 figures; Accepted by IEEE Transactions on Visualization and Computer Graphics, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17500">arXiv:2409.17500</a> <span> [<a href="https://arxiv.org/pdf/2409.17500">pdf</a>, <a href="https://arxiv.org/format/2409.17500">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> GLinSAT: The General Linear Satisfiability Neural Network Layer By Accelerated Gradient Descent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hongtai Zeng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chao Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yanzhen Zhou</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Cheng Yang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qinglai Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17500v2-abstract-short" style="display: inline;"> Ensuring that the outputs of neural networks satisfy specific constraints is crucial for applying neural networks to real-life decision-making problems. In this paper, we consider making a batch of neural network outputs satisfy bounded and general linear constraints. We first reformulate the neural network output projection problem as an entropy-regularized linear programming problem. We show tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17500v2-abstract-full').style.display = 'inline'; document.getElementById('2409.17500v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17500v2-abstract-full" style="display: none;"> Ensuring that the outputs of neural networks satisfy specific constraints is crucial for applying neural networks to real-life decision-making problems. In this paper, we consider making a batch of neural network outputs satisfy bounded and general linear constraints. We first reformulate the neural network output projection problem as an entropy-regularized linear programming problem. We show that such a problem can be equivalently transformed into an unconstrained convex optimization problem with Lipschitz continuous gradient according to the duality theorem. Then, based on an accelerated gradient descent algorithm with numerical performance enhancement, we present our architecture, GLinSAT, to solve the problem. To the best of our knowledge, this is the first general linear satisfiability layer in which all the operations are differentiable and matrix-factorization-free. Despite the fact that we can explicitly perform backpropagation based on automatic differentiation mechanism, we also provide an alternative approach in GLinSAT to calculate the derivatives based on implicit differentiation of the optimality condition. Experimental results on constrained traveling salesman problems, partial graph matching with outliers, predictive portfolio allocation and power system unit commitment demonstrate the advantages of GLinSAT over existing satisfiability layers. Our implementation is available at \url{https://github.com/HunterTracer/GLinSAT}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17500v2-abstract-full').style.display = 'none'; document.getElementById('2409.17500v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by 2024 Advances in Neural Information Processing Systems. The reviews and comments can be found in https://openreview.net/forum?id=m1PVjNHvtP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16627">arXiv:2409.16627</a> <span> [<a href="https://arxiv.org/pdf/2409.16627">pdf</a>, <a href="https://arxiv.org/format/2409.16627">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Train Once, Deploy Anywhere: Matryoshka Representation Learning for Multimodal Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yueqi Wang</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+Z">Zhenrui Yue</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huimin Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dong Wang</a>, <a href="/search/cs?searchtype=author&query=McAuley%2C+J">Julian McAuley</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16627v2-abstract-short" style="display: inline;"> Despite recent advancements in language and vision modeling, integrating rich multimodal knowledge into recommender systems continues to pose significant challenges. This is primarily due to the need for efficient recommendation, which requires adaptive and interactive responses. In this study, we focus on sequential recommendation and introduce a lightweight framework called full-scale Matryoshka… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16627v2-abstract-full').style.display = 'inline'; document.getElementById('2409.16627v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16627v2-abstract-full" style="display: none;"> Despite recent advancements in language and vision modeling, integrating rich multimodal knowledge into recommender systems continues to pose significant challenges. This is primarily due to the need for efficient recommendation, which requires adaptive and interactive responses. In this study, we focus on sequential recommendation and introduce a lightweight framework called full-scale Matryoshka representation learning for multimodal recommendation (fMRLRec). Our fMRLRec captures item features at different granularities, learning informative representations for efficient recommendation across multiple dimensions. To integrate item features from diverse modalities, fMRLRec employs a simple mapping to project multimodal item features into an aligned feature space. Additionally, we design an efficient linear transformation that embeds smaller features into larger ones, substantially reducing memory requirements for large-scale training on recommendation data. Combined with improved state space modeling techniques, fMRLRec scales to different dimensions and only requires one-time training to produce multiple models tailored to various granularities. We demonstrate the effectiveness and efficiency of fMRLRec on multiple benchmark datasets, which consistently achieves superior performance over state-of-the-art baseline methods. We make our code and data publicly available at https://github.com/yueqirex/fMRLRec. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16627v2-abstract-full').style.display = 'none'; document.getElementById('2409.16627v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2024 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15109">arXiv:2409.15109</a> <span> [<a href="https://arxiv.org/pdf/2409.15109">pdf</a>, <a href="https://arxiv.org/format/2409.15109">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> End-User-Centric Collaborative MIMO: Performance Analysis and Proof of Concept </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+C">Chao-Kai Wen</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+Y">Yen-Cheng Chan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tzu-Hao Huang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hao-Jun Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fu-Kang Wang</a>, <a href="/search/cs?searchtype=author&query=Tsai%2C+L">Lung-Sheng Tsai</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+P">Pei-Kai Liao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15109v2-abstract-short" style="display: inline;"> The trend toward using increasingly large arrays of antenna elements continues. However, fitting more antennas into the limited space available on user equipment (UE) within the currently popular Frequency Range 1 spectrum presents a significant challenge. This limitation constrains the capacity-scaling gains for end users, even when networks support a higher number of antennas. To address this is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15109v2-abstract-full').style.display = 'inline'; document.getElementById('2409.15109v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15109v2-abstract-full" style="display: none;"> The trend toward using increasingly large arrays of antenna elements continues. However, fitting more antennas into the limited space available on user equipment (UE) within the currently popular Frequency Range 1 spectrum presents a significant challenge. This limitation constrains the capacity-scaling gains for end users, even when networks support a higher number of antennas. To address this issue, we explore a user-centric collaborative MIMO approach, termed UE-CoMIMO, which leverages several fixed or portable devices within a personal area to form a virtually expanded antenna array. This paper develops a comprehensive mathematical framework to analyze the performance of UE-CoMIMO. Our analytical results demonstrate that UE-CoMIMO can significantly enhance the system's effective channel response within the current communication system without requiring extensive modifications. Further performance improvements can be achieved by optimizing the phase shifters on the expanded antenna arrays at the collaborative devices. These findings are corroborated by ray-tracing simulations. Beyond the simulations, we implemented these collaborative devices and successfully conducted over-the-air validation in a real 5G environment, showcasing the practical potential of UE-CoMIMO. Several practical perspectives are discussed, highlighting the feasibility and benefits of this approach in real-world scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15109v2-abstract-full').style.display = 'none'; document.getElementById('2409.15109v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 11 figures, this work has been submitted to IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12471">arXiv:2409.12471</a> <span> [<a href="https://arxiv.org/pdf/2409.12471">pdf</a>, <a href="https://arxiv.org/format/2409.12471">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Arena 4.0: A Comprehensive ROS2 Development and Benchmarking Platform for Human-centric Navigation Using Generative-Model-based Environment Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shcherbyna1%2C+V">Volodymyr Shcherbyna1</a>, <a href="/search/cs?searchtype=author&query=K%C3%A4stner%2C+L">Linh K盲stner</a>, <a href="/search/cs?searchtype=author&query=Diaz%2C+D">Diego Diaz</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+H+G">Huu Giang Nguyen</a>, <a href="/search/cs?searchtype=author&query=Schreff%2C+M+H">Maximilian Ho-Kyoung Schreff</a>, <a href="/search/cs?searchtype=author&query=Lenz%2C+T">Tim Lenz</a>, <a href="/search/cs?searchtype=author&query=Kreutz%2C+J">Jonas Kreutz</a>, <a href="/search/cs?searchtype=author&query=Martban%2C+A">Ahmed Martban</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huajian Zeng</a>, <a href="/search/cs?searchtype=author&query=Soh%2C+H">Harold Soh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12471v1-abstract-short" style="display: inline;"> Building on the foundations of our previous work, this paper introduces Arena 4.0, a significant advancement over Arena 3.0, Arena-Bench, Arena 1.0, and Arena 2.0. Arena 4.0 offers three key novel contributions: (1) a generative-model-based world and scenario generation approach that utilizes large language models (LLMs) and diffusion models to dynamically generate complex, human-centric environme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12471v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12471v1-abstract-full" style="display: none;"> Building on the foundations of our previous work, this paper introduces Arena 4.0, a significant advancement over Arena 3.0, Arena-Bench, Arena 1.0, and Arena 2.0. Arena 4.0 offers three key novel contributions: (1) a generative-model-based world and scenario generation approach that utilizes large language models (LLMs) and diffusion models to dynamically generate complex, human-centric environments from text prompts or 2D floorplans, useful for the development and benchmarking of social navigation strategies; (2) a comprehensive 3D model database, extendable with additional 3D assets that are semantically linked and annotated for dynamic spawning and arrangement within 3D worlds; and (3) a complete migration to ROS 2, enabling compatibility with modern hardware and enhanced functionalities for improved navigation, usability, and easier deployment on real robots. We evaluated the platform's performance through a comprehensive user study, demonstrating significant improvements in usability and efficiency compared to previous versions. Arena 4.0 is openly available at https://github.com/Arena-Rosnav. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12471v1-abstract-full').style.display = 'none'; document.getElementById('2409.12471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08926">arXiv:2409.08926</a> <span> [<a href="https://arxiv.org/pdf/2409.08926">pdf</a>, <a href="https://arxiv.org/format/2409.08926">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ClearDepth: Enhanced Stereo Perception of Transparent Objects for Robotic Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bai%2C+K">Kaixin Bai</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huajian Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yiwen Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hongli Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhaopeng Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianwei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08926v1-abstract-short" style="display: inline;"> Transparent object depth perception poses a challenge in everyday life and logistics, primarily due to the inability of standard 3D sensors to accurately capture depth on transparent or reflective surfaces. This limitation significantly affects depth map and point cloud-reliant applications, especially in robotic manipulation. We developed a vision transformer-based algorithm for stereo depth reco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08926v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08926v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08926v1-abstract-full" style="display: none;"> Transparent object depth perception poses a challenge in everyday life and logistics, primarily due to the inability of standard 3D sensors to accurately capture depth on transparent or reflective surfaces. This limitation significantly affects depth map and point cloud-reliant applications, especially in robotic manipulation. We developed a vision transformer-based algorithm for stereo depth recovery of transparent objects. This approach is complemented by an innovative feature post-fusion module, which enhances the accuracy of depth recovery by structural features in images. To address the high costs associated with dataset collection for stereo camera-based perception of transparent objects, our method incorporates a parameter-aligned, domain-adaptive, and physically realistic Sim2Real simulation for efficient data generation, accelerated by AI algorithm. Our experimental results demonstrate the model's exceptional Sim2Real generalizability in real-world scenarios, enabling precise depth mapping of transparent objects to assist in robotic manipulation. Project details are available at https://sites.google.com/view/cleardepth/ . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08926v1-abstract-full').style.display = 'none'; document.getElementById('2409.08926v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07417">arXiv:2409.07417</a> <span> [<a href="https://arxiv.org/pdf/2409.07417">pdf</a>, <a href="https://arxiv.org/format/2409.07417">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient One-Step Diffusion Refinement for Snapshot Compressive Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yunzhen Wang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Haijin Zeng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Shaoguang Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongyan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07417v1-abstract-short" style="display: inline;"> Coded Aperture Snapshot Spectral Imaging (CASSI) is a crucial technique for capturing three-dimensional multispectral images (MSIs) through the complex inverse task of reconstructing these images from coded two-dimensional measurements. Current state-of-the-art methods, predominantly end-to-end, face limitations in reconstructing high-frequency details and often rely on constrained datasets like K… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07417v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07417v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07417v1-abstract-full" style="display: none;"> Coded Aperture Snapshot Spectral Imaging (CASSI) is a crucial technique for capturing three-dimensional multispectral images (MSIs) through the complex inverse task of reconstructing these images from coded two-dimensional measurements. Current state-of-the-art methods, predominantly end-to-end, face limitations in reconstructing high-frequency details and often rely on constrained datasets like KAIST and CAVE, resulting in models with poor generalizability. In response to these challenges, this paper introduces a novel one-step Diffusion Probabilistic Model within a self-supervised adaptation framework for Snapshot Compressive Imaging (SCI). Our approach leverages a pretrained SCI reconstruction network to generate initial predictions from two-dimensional measurements. Subsequently, a one-step diffusion model produces high-frequency residuals to enhance these initial predictions. Additionally, acknowledging the high costs associated with collecting MSIs, we develop a self-supervised paradigm based on the Equivariant Imaging (EI) framework. Experimental results validate the superiority of our model compared to previous methods, showcasing its simplicity and adaptability to various end-to-end or unfolding techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07417v1-abstract-full').style.display = 'none'; document.getElementById('2409.07417v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01236">arXiv:2409.01236</a> <span> [<a href="https://arxiv.org/pdf/2409.01236">pdf</a>, <a href="https://arxiv.org/format/2409.01236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Spatial-Aware Conformal Prediction for Trustworthy Hyperspectral Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kangdao Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+T">Tianhao Sun</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hao Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongshan Zhang</a>, <a href="/search/cs?searchtype=author&query=Pun%2C+C">Chi-Man Pun</a>, <a href="/search/cs?searchtype=author&query=Vong%2C+C">Chi-Man Vong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01236v2-abstract-short" style="display: inline;"> Hyperspectral image (HSI) classification involves assigning unique labels to each pixel to identify various land cover categories. While deep classifiers have achieved high predictive accuracy in this field, they lack the ability to rigorously quantify confidence in their predictions. Quantifying the certainty of model predictions is crucial for the safe usage of predictive models, and this limita… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01236v2-abstract-full').style.display = 'inline'; document.getElementById('2409.01236v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01236v2-abstract-full" style="display: none;"> Hyperspectral image (HSI) classification involves assigning unique labels to each pixel to identify various land cover categories. While deep classifiers have achieved high predictive accuracy in this field, they lack the ability to rigorously quantify confidence in their predictions. Quantifying the certainty of model predictions is crucial for the safe usage of predictive models, and this limitation restricts their application in critical contexts where the cost of prediction errors is significant. To support the safe deployment of HSI classifiers, we first provide a theoretical proof establishing the validity of the emerging uncertainty quantification technique, conformal prediction, in the context of HSI classification. We then propose a conformal procedure that equips any trained HSI classifier with trustworthy prediction sets, ensuring that these sets include the true labels with a user-specified probability (e.g., 95\%). Building on this foundation, we introduce Spatial-Aware Conformal Prediction (\texttt{SACP}), a conformal prediction framework specifically designed for HSI data. This method integrates essential spatial information inherent in HSIs by aggregating the non-conformity scores of pixels with high spatial correlation, which effectively enhances the efficiency of prediction sets. Both theoretical and empirical results validate the effectiveness of our proposed approach. The source code is available at \url{https://github.com/J4ckLiu/SACP}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01236v2-abstract-full').style.display = 'none'; document.getElementById('2409.01236v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03886">arXiv:2408.03886</a> <span> [<a href="https://arxiv.org/pdf/2408.03886">pdf</a>, <a href="https://arxiv.org/format/2408.03886">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Retrieval Augmentation via User Interest Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+H">Hanjia Lyu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hanqing Zeng</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yinglong Xia</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ren Chen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jiebo Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03886v1-abstract-short" style="display: inline;"> Many existing industrial recommender systems are sensitive to the patterns of user-item engagement. Light users, who interact less frequently, correspond to a data sparsity problem, making it difficult for the system to accurately learn and represent their preferences. On the other hand, heavy users with rich interaction history often demonstrate a variety of niche interests that are hard to be pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03886v1-abstract-full').style.display = 'inline'; document.getElementById('2408.03886v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03886v1-abstract-full" style="display: none;"> Many existing industrial recommender systems are sensitive to the patterns of user-item engagement. Light users, who interact less frequently, correspond to a data sparsity problem, making it difficult for the system to accurately learn and represent their preferences. On the other hand, heavy users with rich interaction history often demonstrate a variety of niche interests that are hard to be precisely captured under the standard "user-item" similarity measurement. Moreover, implementing these systems in an industrial environment necessitates that they are resource-efficient and scalable to process web-scale data under strict latency constraints. In this paper, we address these challenges by introducing an intermediate "interest" layer between users and items. We propose a novel approach that efficiently constructs user interest and facilitates low computational cost inference by clustering engagement graphs and incorporating user-interest attention. This method enhances the understanding of light users' preferences by linking them with heavy users. By integrating user-interest attention, our approach allows a more personalized similarity metric, adept at capturing the complex dynamics of user-item interactions. The use of interest as an intermediary layer fosters a balance between scalability and expressiveness in the model. Evaluations on two public datasets reveal that our method not only achieves improved recommendation performance but also demonstrates enhanced computational efficiency compared to item-level attention models. Our approach has also been deployed in multiple products at Meta, facilitating short-form video related recommendation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03886v1-abstract-full').style.display = 'none'; document.getElementById('2408.03886v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00629">arXiv:2408.00629</a> <span> [<a href="https://arxiv.org/pdf/2408.00629">pdf</a>, <a href="https://arxiv.org/format/2408.00629">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Cross-Scan Mamba with Masked Training for Robust Spectral Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tian%2C+W">Wenzhe Tian</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Haijin Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yin-Ping Zhao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yongyong Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhen Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuelong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00629v2-abstract-short" style="display: inline;"> Snapshot Compressive Imaging (SCI) enables fast spectral imaging but requires effective decoding algorithms for hyperspectral image (HSI) reconstruction from compressed measurements. Current CNN-based methods are limited in modeling long-range dependencies, while Transformer-based models face high computational complexity. Although recent Mamba models outperform CNNs and Transformers in RGB tasks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00629v2-abstract-full').style.display = 'inline'; document.getElementById('2408.00629v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00629v2-abstract-full" style="display: none;"> Snapshot Compressive Imaging (SCI) enables fast spectral imaging but requires effective decoding algorithms for hyperspectral image (HSI) reconstruction from compressed measurements. Current CNN-based methods are limited in modeling long-range dependencies, while Transformer-based models face high computational complexity. Although recent Mamba models outperform CNNs and Transformers in RGB tasks concerning computational efficiency or accuracy, they are not specifically optimized to fully leverage the local spatial and spectral correlations inherent in HSIs. To address this, we propose the Cross-Scanning Mamba, named CS-Mamba, that employs a Spatial-Spectral SSM for global-local balanced context encoding and cross-channel interaction promotion. Besides, while current reconstruction algorithms perform increasingly well in simulation scenarios, they exhibit suboptimal performance on real data due to limited generalization capability. During the training process, the model may not capture the inherent features of the images but rather learn the parameters to mitigate specific noise and loss, which may lead to a decline in reconstruction quality when faced with real scenes. To overcome this challenge, we propose a masked training method to enhance the generalization ability of models. Experiment results show that our CS-Mamba achieves state-of-the-art performance and the masked training method can better reconstruct smooth features to improve the visual quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00629v2-abstract-full').style.display = 'none'; document.getElementById('2408.00629v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages,7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20647">arXiv:2407.20647</a> <span> [<a href="https://arxiv.org/pdf/2407.20647">pdf</a>, <a href="https://arxiv.org/format/2407.20647">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Image Re-Identification: Where Self-supervision Meets Vision-Language Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bin Wang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuying Liang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+L">Lei Cai</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Huakun Huang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huanqiang Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20647v1-abstract-short" style="display: inline;"> Recently, large-scale vision-language pre-trained models like CLIP have shown impressive performance in image re-identification (ReID). In this work, we explore whether self-supervision can aid in the use of CLIP for image ReID tasks. Specifically, we propose SVLL-ReID, the first attempt to integrate self-supervision and pre-trained CLIP via two training stages to facilitate the image ReID. We obs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20647v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20647v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20647v1-abstract-full" style="display: none;"> Recently, large-scale vision-language pre-trained models like CLIP have shown impressive performance in image re-identification (ReID). In this work, we explore whether self-supervision can aid in the use of CLIP for image ReID tasks. Specifically, we propose SVLL-ReID, the first attempt to integrate self-supervision and pre-trained CLIP via two training stages to facilitate the image ReID. We observe that: 1) incorporating language self-supervision in the first training stage can make the learnable text prompts more distinguishable, and 2) incorporating vision self-supervision in the second training stage can make the image features learned by the image encoder more discriminative. These observations imply that: 1) the text prompt learning in the first stage can benefit from the language self-supervision, and 2) the image feature learning in the second stage can benefit from the vision self-supervision. These benefits jointly facilitate the performance gain of the proposed SVLL-ReID. By conducting experiments on six image ReID benchmark datasets without any concrete text labels, we find that the proposed SVLL-ReID achieves the overall best performances compared with state-of-the-arts. Codes will be publicly available at https://github.com/BinWangGzhu/SVLL-ReID. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20647v1-abstract-full').style.display = 'none'; document.getElementById('2407.20647v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09786">arXiv:2407.09786</a> <span> [<a href="https://arxiv.org/pdf/2407.09786">pdf</a>, <a href="https://arxiv.org/format/2407.09786">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Self-supervised 3D Point Cloud Completion via Multi-view Adversarial Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lintai Wu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xianjing Cheng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yong Xu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huanqiang Zeng</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+J">Junhui Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09786v2-abstract-short" style="display: inline;"> In real-world scenarios, scanned point clouds are often incomplete due to occlusion issues. The task of self-supervised point cloud completion involves reconstructing missing regions of these incomplete objects without the supervision of complete ground truth. Current self-supervised methods either rely on multiple views of partial observations for supervision or overlook the intrinsic geometric s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09786v2-abstract-full').style.display = 'inline'; document.getElementById('2407.09786v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09786v2-abstract-full" style="display: none;"> In real-world scenarios, scanned point clouds are often incomplete due to occlusion issues. The task of self-supervised point cloud completion involves reconstructing missing regions of these incomplete objects without the supervision of complete ground truth. Current self-supervised methods either rely on multiple views of partial observations for supervision or overlook the intrinsic geometric similarity that can be identified and utilized from the given partial point clouds. In this paper, we propose MAL-SPC, a framework that effectively leverages both object-level and category-specific geometric similarities to complete missing structures. Our MAL-SPC does not require any 3D complete supervision and only necessitates a single partial point cloud for each object. Specifically, we first introduce a Pattern Retrieval Network to retrieve similar position and curvature patterns between the partial input and the predicted shape, then leverage these similarities to densify and refine the reconstructed results. Additionally, we render the reconstructed complete shape into multi-view depth maps and design an adversarial learning module to learn the geometry of the target shape from category-specific single-view depth images. To achieve anisotropic rendering, we design a density-aware radius estimation algorithm to improve the quality of the rendered images. Our MAL-SPC yields the best results compared to current state-of-the-art methods.We will make the source code publicly available at \url{https://github.com/ltwu6/malspc <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09786v2-abstract-full').style.display = 'none'; document.getElementById('2407.09786v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages,10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01191">arXiv:2407.01191</a> <span> [<a href="https://arxiv.org/pdf/2407.01191">pdf</a>, <a href="https://arxiv.org/format/2407.01191">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MARS: Multimodal Active Robotic Sensing for Articulated Characterization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hongliang Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chengjiong Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiahua Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+T">Tingyu Ye</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01191v1-abstract-short" style="display: inline;"> Precise perception of articulated objects is vital for empowering service robots. Recent studies mainly focus on point cloud, a single-modal approach, often neglecting vital texture and lighting details and assuming ideal conditions like optimal viewpoints, unrepresentative of real-world scenarios. To address these limitations, we introduce MARS, a novel framework for articulated object characteri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01191v1-abstract-full').style.display = 'inline'; document.getElementById('2407.01191v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01191v1-abstract-full" style="display: none;"> Precise perception of articulated objects is vital for empowering service robots. Recent studies mainly focus on point cloud, a single-modal approach, often neglecting vital texture and lighting details and assuming ideal conditions like optimal viewpoints, unrepresentative of real-world scenarios. To address these limitations, we introduce MARS, a novel framework for articulated object characterization. It features a multi-modal fusion module utilizing multi-scale RGB features to enhance point cloud features, coupled with reinforcement learning-based active sensing for autonomous optimization of observation viewpoints. In experiments conducted with various articulated object instances from the PartNet-Mobility dataset, our method outperformed current state-of-the-art methods in joint parameter estimation accuracy. Additionally, through active sensing, MARS further reduces errors, demonstrating enhanced efficiency in handling suboptimal viewpoints. Furthermore, our method effectively generalizes to real-world articulated objects, enhancing robot interactions. Code is available at https://github.com/robhlzeng/MARS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01191v1-abstract-full').style.display = 'none'; document.getElementById('2407.01191v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17342">arXiv:2406.17342</a> <span> [<a href="https://arxiv.org/pdf/2406.17342">pdf</a>, <a href="https://arxiv.org/format/2406.17342">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Masked Generative Extractor for Synergistic Representation and 3D Generation of Point Clouds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hongliang Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fang Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiahua Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+T">Tingyu Ye</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+P">Pengteng Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17342v2-abstract-short" style="display: inline;"> Representation and generative learning, as reconstruction-based methods, have demonstrated their potential for mutual reinforcement across various domains. In the field of point cloud processing, although existing studies have adopted training strategies from generative models to enhance representational capabilities, these methods are limited by their inability to genuinely generate 3D shapes. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17342v2-abstract-full').style.display = 'inline'; document.getElementById('2406.17342v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17342v2-abstract-full" style="display: none;"> Representation and generative learning, as reconstruction-based methods, have demonstrated their potential for mutual reinforcement across various domains. In the field of point cloud processing, although existing studies have adopted training strategies from generative models to enhance representational capabilities, these methods are limited by their inability to genuinely generate 3D shapes. To explore the benefits of deeply integrating 3D representation learning and generative learning, we propose an innovative framework called \textit{Point-MGE}. Specifically, this framework first utilizes a vector quantized variational autoencoder to reconstruct a neural field representation of 3D shapes, thereby learning discrete semantic features of point patches. Subsequently, we design a sliding masking ratios to smooth the transition from representation learning to generative learning. Moreover, our method demonstrates strong generalization capability in learning high-capacity models, achieving new state-of-the-art performance across multiple downstream tasks. In shape classification, Point-MGE achieved an accuracy of 94.2% (+1.0%) on the ModelNet40 dataset and 92.9% (+5.5%) on the ScanObjectNN dataset. Experimental results also confirmed that Point-MGE can generate high-quality 3D shapes in both unconditional and conditional settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17342v2-abstract-full').style.display = 'none'; document.getElementById('2406.17342v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12349">arXiv:2406.12349</a> <span> [<a href="https://arxiv.org/pdf/2406.12349">pdf</a>, <a href="https://arxiv.org/format/2406.12349">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Effective Generation of Feasible Solutions for Integer Programming via Guided Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hao Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiaqi Wang</a>, <a href="/search/cs?searchtype=author&query=Das%2C+A">Avirup Das</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Junying He</a>, <a href="/search/cs?searchtype=author&query=Han%2C+K">Kunpeng Han</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Haoyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingfei Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12349v1-abstract-short" style="display: inline;"> Feasible solutions are crucial for Integer Programming (IP) since they can substantially speed up the solving process. In many applications, similar IP instances often exhibit similar structures and shared solution distributions, which can be potentially modeled by deep learning methods. Unfortunately, existing deep-learning-based algorithms, such as Neural Diving and Predict-and-search framework,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12349v1-abstract-full').style.display = 'inline'; document.getElementById('2406.12349v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12349v1-abstract-full" style="display: none;"> Feasible solutions are crucial for Integer Programming (IP) since they can substantially speed up the solving process. In many applications, similar IP instances often exhibit similar structures and shared solution distributions, which can be potentially modeled by deep learning methods. Unfortunately, existing deep-learning-based algorithms, such as Neural Diving and Predict-and-search framework, are limited to generating only partial feasible solutions, and they must rely on solvers like SCIP and Gurobi to complete the solutions for a given IP problem. In this paper, we propose a novel framework that generates complete feasible solutions end-to-end. Our framework leverages contrastive learning to characterize the relationship between IP instances and solutions, and learns latent embeddings for both IP instances and their solutions. Further, the framework employs diffusion models to learn the distribution of solution embeddings conditioned on IP representations, with a dedicated guided sampling strategy that accounts for both constraints and objectives. We empirically evaluate our framework on four typical datasets of IP problems, and show that it effectively generates complete feasible solutions with a high probability (> 89.7 \%) without the reliance of Solvers and the quality of solutions is comparable to the best heuristic solutions from Gurobi. Furthermore, by integrating our method's sampled partial solutions with the CompleteSol heuristic from SCIP, the resulting feasible solutions outperform those from state-of-the-art methods across all datasets, exhibiting a 3.7 to 33.7\% improvement in the gap to optimal values, and maintaining a feasible ratio of over 99.7\% for all datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12349v1-abstract-full').style.display = 'none'; document.getElementById('2406.12349v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to SIGKDD 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09815">arXiv:2406.09815</a> <span> [<a href="https://arxiv.org/pdf/2406.09815">pdf</a>, <a href="https://arxiv.org/format/2406.09815">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Retrieval Augmented Fact Verification by Synthesizing Contrastive Arguments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yue%2C+Z">Zhenrui Yue</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huimin Zeng</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+L">Lanyu Shang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yifan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09815v1-abstract-short" style="display: inline;"> The rapid propagation of misinformation poses substantial risks to public interest. To combat misinformation, large language models (LLMs) are adapted to automatically verify claim credibility. Nevertheless, existing methods heavily rely on the embedded knowledge within LLMs and / or black-box APIs for evidence collection, leading to subpar performance with smaller LLMs or upon unreliable context.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09815v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09815v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09815v1-abstract-full" style="display: none;"> The rapid propagation of misinformation poses substantial risks to public interest. To combat misinformation, large language models (LLMs) are adapted to automatically verify claim credibility. Nevertheless, existing methods heavily rely on the embedded knowledge within LLMs and / or black-box APIs for evidence collection, leading to subpar performance with smaller LLMs or upon unreliable context. In this paper, we propose retrieval augmented fact verification through the synthesis of contrasting arguments (RAFTS). Upon input claims, RAFTS starts with evidence retrieval, where we design a retrieval pipeline to collect and re-rank relevant documents from verifiable sources. Then, RAFTS forms contrastive arguments (i.e., supporting or refuting) conditioned on the retrieved evidence. In addition, RAFTS leverages an embedding model to identify informative demonstrations, followed by in-context prompting to generate the prediction and explanation. Our method effectively retrieves relevant documents as evidence and evaluates arguments from varying perspectives, incorporating nuanced information for fine-grained decision-making. Combined with informative in-context examples as prior, RAFTS achieves significant improvements to supervised and LLM baselines without complex prompts. We demonstrate the effectiveness of our method through extensive experiments, where RAFTS can outperform GPT-based methods with a significantly smaller 7B LLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09815v1-abstract-full').style.display = 'none'; document.getElementById('2406.09815v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09317">arXiv:2406.09317</a> <span> [<a href="https://arxiv.org/pdf/2406.09317">pdf</a>, <a href="https://arxiv.org/format/2406.09317">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Common and Rare Fundus Diseases Identification Using Vision-Language Foundation Model with Knowledge of Over 400 Diseases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+M">Meng Wang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+T">Tian Lin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+A">Aidi Lin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+K">Kai Yu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Y">Yuanyuan Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lianyu Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cheng Chen</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+K">Ke Zou</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+H">Huiyu Liang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Man Chen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xue Yao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Meiqin Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Binwei Huang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+C">Chaoxin Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Peixin Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yilong Luo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yifan Chen</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+H">Honghe Xia</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+T">Tingkun Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jinming Guo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaolin Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingcheng Wang</a>, <a href="/search/cs?searchtype=author&query=Tham%2C+Y+C">Yih Chung Tham</a> , et al. (24 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09317v2-abstract-short" style="display: inline;"> Previous foundation models for retinal images were pre-trained with limited disease categories and knowledge base. Here we introduce RetiZero, a vision-language foundation model that leverages knowledge from over 400 fundus diseases. To RetiZero's pre-training, we compiled 341,896 fundus images paired with text descriptions, sourced from public datasets, ophthalmic literature, and online resources… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09317v2-abstract-full').style.display = 'inline'; document.getElementById('2406.09317v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09317v2-abstract-full" style="display: none;"> Previous foundation models for retinal images were pre-trained with limited disease categories and knowledge base. Here we introduce RetiZero, a vision-language foundation model that leverages knowledge from over 400 fundus diseases. To RetiZero's pre-training, we compiled 341,896 fundus images paired with text descriptions, sourced from public datasets, ophthalmic literature, and online resources, encompassing a diverse range of diseases across multiple ethnicities and countries. RetiZero exhibits superior performance in several downstream tasks, including zero-shot disease recognition, image-to-image retrieval, and internal- and cross-domain disease identification. In zero-shot scenarios, RetiZero achieves Top5 accuracy scores of 0.8430 for 15 fundus diseases and 0.7561 for 52 fundus diseases. For image retrieval, it achieves Top5 scores of 0.9500 and 0.8860 for the same disease sets, respectively. Clinical evaluations show that RetiZero's Top3 zero-shot performance surpasses the average of 19 ophthalmologists from Singapore, China and the United States. Furthermore, RetiZero significantly enhances clinicians' accuracy in diagnosing fundus disease. These findings underscore the value of integrating the RetiZero foundation model into clinical settings, where a variety of fundus diseases are encountered. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09317v2-abstract-full').style.display = 'none'; document.getElementById('2406.09317v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01028">arXiv:2406.01028</a> <span> [<a href="https://arxiv.org/pdf/2406.01028">pdf</a>, <a href="https://arxiv.org/format/2406.01028">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LLEMamba: Low-Light Enhancement via Relighting-Guided Mamba with Deep Unfolding Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuanqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Haijin Zeng</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J">Jinwang Pan</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Q">Qiangqiang Shen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yongyong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01028v1-abstract-short" style="display: inline;"> Transformer-based low-light enhancement methods have yielded promising performance by effectively capturing long-range dependencies in a global context. However, their elevated computational demand limits the scalability of multiple iterations in deep unfolding networks, and hence they have difficulty in flexibly balancing interpretability and distortion. To address this issue, we propose a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01028v1-abstract-full').style.display = 'inline'; document.getElementById('2406.01028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01028v1-abstract-full" style="display: none;"> Transformer-based low-light enhancement methods have yielded promising performance by effectively capturing long-range dependencies in a global context. However, their elevated computational demand limits the scalability of multiple iterations in deep unfolding networks, and hence they have difficulty in flexibly balancing interpretability and distortion. To address this issue, we propose a novel Low-Light Enhancement method via relighting-guided Mamba with a deep unfolding network (LLEMamba), whose theoretical interpretability and fidelity are guaranteed by Retinex optimization and Mamba deep priors, respectively. Specifically, our LLEMamba first constructs a Retinex model with deep priors, embedding the iterative optimization process based on the Alternating Direction Method of Multipliers (ADMM) within a deep unfolding network. Unlike Transformer, to assist the deep unfolding framework with multiple iterations, the proposed LLEMamba introduces a novel Mamba architecture with lower computational complexity, which not only achieves light-dependent global visual context for dark images during reflectance relight but also optimizes to obtain more stable closed-form solutions. Experiments on the benchmarks show that LLEMamba achieves superior quantitative evaluations and lower distortion visual results compared to existing state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01028v1-abstract-full').style.display = 'none'; document.getElementById('2406.01028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00837">arXiv:2406.00837</a> <span> [<a href="https://arxiv.org/pdf/2406.00837">pdf</a>, <a href="https://arxiv.org/format/2406.00837">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Arena 3.0: Advancing Social Navigation in Collaborative and Highly Dynamic Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=K%C3%A4stner%2C+L">Linh K盲stner</a>, <a href="/search/cs?searchtype=author&query=Shcherbyna%2C+V">Volodymyir Shcherbyna</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Huajian Zeng</a>, <a href="/search/cs?searchtype=author&query=Le%2C+T+A">Tuan Anh Le</a>, <a href="/search/cs?searchtype=author&query=Schreff%2C+M+H">Maximilian Ho-Kyoung Schreff</a>, <a href="/search/cs?searchtype=author&query=Osmaev%2C+H">Halid Osmaev</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+N+T">Nam Truong Tran</a>, <a href="/search/cs?searchtype=author&query=Diaz%2C+D">Diego Diaz</a>, <a href="/search/cs?searchtype=author&query=Golebiowski%2C+J">Jan Golebiowski</a>, <a href="/search/cs?searchtype=author&query=Soh%2C+H">Harold Soh</a>, <a href="/search/cs?searchtype=author&query=Lambrecht%2C+J">Jens Lambrecht</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00837v1-abstract-short" style="display: inline;"> Building upon our previous contributions, this paper introduces Arena 3.0, an extension of Arena-Bench, Arena 1.0, and Arena 2.0. Arena 3.0 is a comprehensive software stack containing multiple modules and simulation environments focusing on the development, simulation, and benchmarking of social navigation approaches in collaborative environments. We significantly enhance the realism of human beh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00837v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00837v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00837v1-abstract-full" style="display: none;"> Building upon our previous contributions, this paper introduces Arena 3.0, an extension of Arena-Bench, Arena 1.0, and Arena 2.0. Arena 3.0 is a comprehensive software stack containing multiple modules and simulation environments focusing on the development, simulation, and benchmarking of social navigation approaches in collaborative environments. We significantly enhance the realism of human behavior simulation by incorporating a diverse array of new social force models and interaction patterns, encompassing both human-human and human-robot dynamics. The platform provides a comprehensive set of new task modes, designed for extensive benchmarking and testing and is capable of generating realistic and human-centric environments dynamically, catering to a broad spectrum of social navigation scenarios. In addition, the platform's functionalities have been abstracted across three widely used simulators, each tailored for specific training and testing purposes. The platform's efficacy has been validated through an extensive benchmark and user evaluations of the platform by a global community of researchers and students, which noted the substantial improvement compared to previous versions and expressed interests to utilize the platform for future research and development. Arena 3.0 is openly available at https://github.com/Arena-Rosnav. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00837v1-abstract-full').style.display = 'none'; document.getElementById('2406.00837v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Robotics Science and Systems 2024, Delft Netherlands </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18955">arXiv:2405.18955</a> <span> [<a href="https://arxiv.org/pdf/2405.18955">pdf</a>, <a href="https://arxiv.org/format/2405.18955">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RGB-T Object Detection via Group Shuffled Multi-receptive Attention and Multi-modal Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinzhong Wang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+X">Xuetao Tian</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+S">Shun Dai</a>, <a href="/search/cs?searchtype=author&query=Zhuo%2C+T">Tao Zhuo</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Haorui Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongjuan Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaqi Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiuwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanning Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18955v1-abstract-short" style="display: inline;"> Multispectral object detection, utilizing both visible (RGB) and thermal infrared (T) modals, has garnered significant attention for its robust performance across diverse weather and lighting conditions. However, effectively exploiting the complementarity between RGB-T modals while maintaining efficiency remains a critical challenge. In this paper, a very simple Group Shuffled Multi-receptive Atte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18955v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18955v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18955v1-abstract-full" style="display: none;"> Multispectral object detection, utilizing both visible (RGB) and thermal infrared (T) modals, has garnered significant attention for its robust performance across diverse weather and lighting conditions. However, effectively exploiting the complementarity between RGB-T modals while maintaining efficiency remains a critical challenge. In this paper, a very simple Group Shuffled Multi-receptive Attention (GSMA) module is proposed to extract and combine multi-scale RGB and thermal features. Then, the extracted multi-modal features are directly integrated with a multi-level path aggregation neck, which significantly improves the fusion effect and efficiency. Meanwhile, multi-modal object detection often adopts union annotations for both modals. This kind of supervision is not sufficient and unfair, since objects observed in one modal may not be seen in the other modal. To solve this issue, Multi-modal Supervision (MS) is proposed to sufficiently supervise RGB-T object detection. Comprehensive experiments on two challenging benchmarks, KAIST and DroneVehicle, demonstrate the proposed model achieves the state-of-the-art accuracy while maintaining competitive efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18955v1-abstract-full').style.display = 'none'; document.getElementById('2405.18955v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16102">arXiv:2405.16102</a> <span> [<a href="https://arxiv.org/pdf/2405.16102">pdf</a>, <a href="https://arxiv.org/format/2405.16102">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Reliable Source Approximation: Source-Free Unsupervised Domain Adaptation for Vestibular Schwannoma MRI Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hongye Zeng</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+K">Ke Zou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhihao Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+R">Rui Zheng</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+H">Huazhu Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16102v1-abstract-short" style="display: inline;"> Source-Free Unsupervised Domain Adaptation (SFUDA) has recently become a focus in the medical image domain adaptation, as it only utilizes the source model and does not require annotated target data. However, current SFUDA approaches cannot tackle the complex segmentation task across different MRI sequences, such as the vestibular schwannoma segmentation. To address this problem, we proposed Relia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16102v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16102v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16102v1-abstract-full" style="display: none;"> Source-Free Unsupervised Domain Adaptation (SFUDA) has recently become a focus in the medical image domain adaptation, as it only utilizes the source model and does not require annotated target data. However, current SFUDA approaches cannot tackle the complex segmentation task across different MRI sequences, such as the vestibular schwannoma segmentation. To address this problem, we proposed Reliable Source Approximation (RSA), which can generate source-like and structure-preserved images from the target domain for updating model parameters and adapting domain shifts. Specifically, RSA deploys a conditional diffusion model to generate multiple source-like images under the guidance of varying edges of one target image. An uncertainty estimation module is then introduced to predict and refine reliable pseudo labels of generated images, and the prediction consistency is developed to select the most reliable generations. Subsequently, all reliable generated images and their pseudo labels are utilized to update the model. Our RSA is validated on vestibular schwannoma segmentation across multi-modality MRI. The experimental results demonstrate that RSA consistently improves domain adaptation performance over other state-of-the-art SFUDA methods. Code is available at https://github.com/zenghy96/Reliable-Source-Approximation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16102v1-abstract-full').style.display = 'none'; document.getElementById('2405.16102v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Early accepted by MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15600">arXiv:2405.15600</a> <span> [<a href="https://arxiv.org/pdf/2405.15600">pdf</a>, <a href="https://arxiv.org/format/2405.15600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Econometrics">econ.EM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Transfer Learning for Spatial Autoregressive Models with Application to U.S. Presidential Election Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hao Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+W">Wei Zhong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xingbai Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15600v2-abstract-short" style="display: inline;"> It is important to incorporate spatial geographic information into U.S. presidential election analysis, especially for swing states. The state-level analysis also faces significant challenges of limited spatial data availability. To address the challenges of spatial dependence and small sample sizes in predicting U.S. presidential election results using spatially dependent data, we propose a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15600v2-abstract-full').style.display = 'inline'; document.getElementById('2405.15600v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15600v2-abstract-full" style="display: none;"> It is important to incorporate spatial geographic information into U.S. presidential election analysis, especially for swing states. The state-level analysis also faces significant challenges of limited spatial data availability. To address the challenges of spatial dependence and small sample sizes in predicting U.S. presidential election results using spatially dependent data, we propose a novel transfer learning framework within the SAR model, called as tranSAR. Classical SAR model estimation often loses accuracy with small target data samples. Our framework enhances estimation and prediction by leveraging information from similar source data. We introduce a two-stage algorithm, consisting of a transferring stage and a debiasing stage, to estimate parameters and establish theoretical convergence rates for the estimators. Additionally, if the informative source data are unknown, we propose a transferable source detection algorithm using spatial residual bootstrap to maintain spatial dependence and derive its detection consistency. Simulation studies show our algorithm substantially improves the classical two-stage least squares estimator. We demonstrate our method's effectiveness in predicting outcomes in U.S. presidential swing states, where it outperforms traditional methods. In addition, our tranSAR model predicts that the Democratic party will win the 2024 U.S. presidential election. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15600v2-abstract-full').style.display = 'none'; document.getElementById('2405.15600v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13965">arXiv:2405.13965</a> <span> [<a href="https://arxiv.org/pdf/2405.13965">pdf</a>, <a href="https://arxiv.org/format/2405.13965">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Unleashing the Power of Unlabeled Data: A Self-supervised Learning Framework for Cyber Attack Detection in Smart Grids </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hanyu Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Pengfei Zhou</a>, <a href="/search/cs?searchtype=author&query=Lou%2C+X">Xin Lou</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+Z+W">Zhen Wei Ng</a>, <a href="/search/cs?searchtype=author&query=Yau%2C+D+K+Y">David K. Y. Yau</a>, <a href="/search/cs?searchtype=author&query=Winslett%2C+M">Marianne Winslett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13965v1-abstract-short" style="display: inline;"> Modern power grids are undergoing significant changes driven by information and communication technologies (ICTs), and evolving into smart grids with higher efficiency and lower operation cost. Using ICTs, however, comes with an inevitable side effect that makes the power system more vulnerable to cyber attacks. In this paper, we propose a self-supervised learning-based framework to detect and ide… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13965v1-abstract-full').style.display = 'inline'; document.getElementById('2405.13965v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13965v1-abstract-full" style="display: none;"> Modern power grids are undergoing significant changes driven by information and communication technologies (ICTs), and evolving into smart grids with higher efficiency and lower operation cost. Using ICTs, however, comes with an inevitable side effect that makes the power system more vulnerable to cyber attacks. In this paper, we propose a self-supervised learning-based framework to detect and identify various types of cyber attacks. Different from existing approaches, the proposed framework does not rely on large amounts of well-curated labeled data but makes use of the massive unlabeled data in the wild which are easily accessible. Specifically, the proposed framework adopts the BERT model from the natural language processing domain and learns generalizable and effective representations from the unlabeled sensing data, which capture the distinctive patterns of different attacks. Using the learned representations, together with a very small amount of labeled data, we can train a task-specific classifier to detect various types of cyber attacks. Meanwhile, real-world training datasets are usually imbalanced, i.e., there are only a limited number of data samples containing attacks. In order to cope with such data imbalance, we propose a new loss function, separate mean error (SME), which pays equal attention to the large and small categories to better train the model. Experiment results in a 5-area power grid system with 37 buses demonstrate the superior performance of our framework over existing approaches, especially when a very limited portion of labeled data are available, e.g., as low as 0.002\%. We believe such a framework can be easily adopted to detect a variety of cyber attacks in other power grid scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13965v1-abstract-full').style.display = 'none'; document.getElementById('2405.13965v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13179">arXiv:2405.13179</a> <span> [<a href="https://arxiv.org/pdf/2405.13179">pdf</a>, <a href="https://arxiv.org/format/2405.13179">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RAG-RLRC-LaySum at BioLaySumm: Integrating Retrieval-Augmented Generation and Readability Control for Layman Summarization of Biomedical Texts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+Y">Yuelyu Ji</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuochun Li</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+R">Rui Meng</a>, <a href="/search/cs?searchtype=author&query=Sivarajkumar%2C+S">Sonish Sivarajkumar</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanshan Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zeshui Yu</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+H">Hui Ji</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yushui Han</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+H">Hanyu Zeng</a>, <a href="/search/cs?searchtype=author&query=He%2C+D">Daqing He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13179v4-abstract-short" style="display: inline;"> This paper introduces the RAG-RLRC-LaySum framework, designed to make complex biomedical research understandable to laymen through advanced Natural Language Processing (NLP) techniques. Our Retrieval Augmented Generation (RAG) solution, enhanced by a reranking method, utilizes multiple knowledge sources to ensure the precision and pertinence of lay summaries. Additionally, our Reinforcement Learni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13179v4-abstract-full').style.display = 'inline'; document.getElementById('2405.13179v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13179v4-abstract-full" style="display: none;"> This paper introduces the RAG-RLRC-LaySum framework, designed to make complex biomedical research understandable to laymen through advanced Natural Language Processing (NLP) techniques. Our Retrieval Augmented Generation (RAG) solution, enhanced by a reranking method, utilizes multiple knowledge sources to ensure the precision and pertinence of lay summaries. Additionally, our Reinforcement Learning for Readability Control (RLRC) strategy improves readability, making scientific content comprehensible to non-specialists. Evaluations using the publicly accessible PLOS and eLife datasets show that our methods surpass Plain Gemini model, demonstrating a 20% increase in readability scores, a 15% improvement in ROUGE-2 relevance scores, and a 10% enhancement in factual accuracy. The RAG-RLRC-LaySum framework effectively democratizes scientific knowledge, enhancing public engagement with biomedical discoveries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13179v4-abstract-full').style.display = 'none'; document.getElementById('2405.13179v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository