CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,283 results for author: <span class="mathjax">Zheng, Y</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Zheng%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zheng, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zheng%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zheng, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13308">arXiv:2502.13308</a> <span> [<a href="https://arxiv.org/pdf/2502.13308">pdf</a>, <a href="https://arxiv.org/format/2502.13308">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Label-Free Heterophily-Guided Approach for Unsupervised Graph Fraud Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pan%2C+J">Junjun Pan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yixin Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xin Zheng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yizhen Zheng</a>, <a href="/search/cs?searchtype=author&query=Liew%2C+A+W">Alan Wee-Chung Liew</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fuyi Li</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+S">Shirui Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13308v1-abstract-short" style="display: inline;"> Graph fraud detection (GFD) has rapidly advanced in protecting online services by identifying malicious fraudsters. Recent supervised GFD research highlights that heterophilic connections between fraudsters and users can greatly impact detection performance, since fraudsters tend to camouflage themselves by building more connections to benign users. Despite the promising performance of supervised… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13308v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13308v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13308v1-abstract-full" style="display: none;"> Graph fraud detection (GFD) has rapidly advanced in protecting online services by identifying malicious fraudsters. Recent supervised GFD research highlights that heterophilic connections between fraudsters and users can greatly impact detection performance, since fraudsters tend to camouflage themselves by building more connections to benign users. Despite the promising performance of supervised GFD methods, the reliance on labels limits their applications to unsupervised scenarios; Additionally, accurately capturing complex and diverse heterophily patterns without labels poses a further challenge. To fill the gap, we propose a Heterophily-guided Unsupervised Graph fraud dEtection approach (HUGE) for unsupervised GFD, which contains two essential components: a heterophily estimation module and an alignment-based fraud detection module. In the heterophily estimation module, we design a novel label-free heterophily metric called HALO, which captures the critical graph properties for GFD, enabling its outstanding ability to estimate heterophily from node attributes. In the alignment-based fraud detection module, we develop a joint MLP-GNN architecture with ranking loss and asymmetric alignment loss. The ranking loss aligns the predicted fraud score with the relative order of HALO, providing an extra robustness guarantee by comparing heterophily among non-adjacent nodes. Moreover, the asymmetric alignment loss effectively utilizes structural information while alleviating the feature-smooth effects of GNNs.Extensive experiments on 6 datasets demonstrate that HUGE significantly outperforms competitors, showcasing its effectiveness and robustness. The source code of HUGE is at https://github.com/CampanulaBells/HUGE-GAD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13308v1-abstract-full').style.display = 'none'; document.getElementById('2502.13308v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 3 figures. Accepted by AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13081">arXiv:2502.13081</a> <span> [<a href="https://arxiv.org/pdf/2502.13081">pdf</a>, <a href="https://arxiv.org/format/2502.13081">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Personalized Image Generation with Deep Generative Models: A Decade Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+Y">Yuxiang Wei</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yiheng Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yabo Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+Z">Zhilong Ji</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+W">Wangmeng Zuo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13081v1-abstract-short" style="display: inline;"> Recent advancements in generative models have significantly facilitated the development of personalized content creation. Given a small set of images with user-specific concept, personalized image generation allows to create images that incorporate the specified concept and adhere to provided text descriptions. Due to its wide applications in content creation, significant effort has been devoted t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13081v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13081v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13081v1-abstract-full" style="display: none;"> Recent advancements in generative models have significantly facilitated the development of personalized content creation. Given a small set of images with user-specific concept, personalized image generation allows to create images that incorporate the specified concept and adhere to provided text descriptions. Due to its wide applications in content creation, significant effort has been devoted to this field in recent years. Nonetheless, the technologies used for personalization have evolved alongside the development of generative models, with their distinct and interrelated components. In this survey, we present a comprehensive review of generalized personalized image generation across various generative models, including traditional GANs, contemporary text-to-image diffusion models, and emerging multi-model autoregressive models. We first define a unified framework that standardizes the personalization process across different generative models, encompassing three key components, i.e., inversion spaces, inversion methods, and personalization schemes. This unified framework offers a structured approach to dissecting and comparing personalization techniques across different generative architectures. Building upon this unified framework, we further provide an in-depth analysis of personalization techniques within each generative model, highlighting their unique contributions and innovations. Through comparative analysis, this survey elucidates the current landscape of personalized image generation, identifying commonalities and distinguishing features among existing methods. Finally, we discuss the open challenges in the field and propose potential directions for future research. We keep tracing related works at https://github.com/csyxwei/Awesome-Personalized-Image-Generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13081v1-abstract-full').style.display = 'none'; document.getElementById('2502.13081v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">39 pages; under submission; more information: https://github.com/csyxwei/Awesome-Personalized-Image-Generation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11724">arXiv:2502.11724</a> <span> [<a href="https://arxiv.org/pdf/2502.11724">pdf</a>, <a href="https://arxiv.org/format/2502.11724">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Incomplete Modality Disentangled Representation for Ophthalmic Disease Grading and Diagnosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengzhi Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zile Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhe Chen</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+F">Feilong Tang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Yu Tian</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhongxing Xu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Z">Zihong Luo</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yalin Zheng</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Y">Yanda Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11724v1-abstract-short" style="display: inline;"> Ophthalmologists typically require multimodal data sources to improve diagnostic accuracy in clinical decisions. However, due to medical device shortages, low-quality data and data privacy concerns, missing data modalities are common in real-world scenarios. Existing deep learning methods tend to address it by learning an implicit latent subspace representation for different modality combinations.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11724v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11724v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11724v1-abstract-full" style="display: none;"> Ophthalmologists typically require multimodal data sources to improve diagnostic accuracy in clinical decisions. However, due to medical device shortages, low-quality data and data privacy concerns, missing data modalities are common in real-world scenarios. Existing deep learning methods tend to address it by learning an implicit latent subspace representation for different modality combinations. We identify two significant limitations of these methods: (1) implicit representation constraints that hinder the model's ability to capture modality-specific information and (2) modality heterogeneity, causing distribution gaps and redundancy in feature representations. To address these, we propose an Incomplete Modality Disentangled Representation (IMDR) strategy, which disentangles features into explicit independent modal-common and modal-specific features by guidance of mutual information, distilling informative knowledge and enabling it to reconstruct valuable missing semantics and produce robust multimodal representations. Furthermore, we introduce a joint proxy learning module that assists IMDR in eliminating intra-modality redundancy by exploiting the extracted proxies from each class. Experiments on four ophthalmology multimodal datasets demonstrate that the proposed IMDR outperforms the state-of-the-art methods significantly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11724v1-abstract-full').style.display = 'none'; document.getElementById('2502.11724v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 Pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> AAAI2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11519">arXiv:2502.11519</a> <span> [<a href="https://arxiv.org/pdf/2502.11519">pdf</a>, <a href="https://arxiv.org/format/2502.11519">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3696410.3714636">10.1145/3696410.3714636 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> UniGO: A Unified Graph Neural Network for Modeling Opinion Dynamics on Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Hao Jiang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuke Zheng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hao Sun</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+W">Wenying Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11519v1-abstract-short" style="display: inline;"> Polarization and fragmentation in social media amplify user biases, making it increasingly important to understand the evolution of opinions. Opinion dynamics provide interpretability for studying opinion evolution, yet incorporating these insights into predictive models remains challenging. This challenge arises due to the inherent complexity of the diversity of opinion fusion rules and the diffi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11519v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11519v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11519v1-abstract-full" style="display: none;"> Polarization and fragmentation in social media amplify user biases, making it increasingly important to understand the evolution of opinions. Opinion dynamics provide interpretability for studying opinion evolution, yet incorporating these insights into predictive models remains challenging. This challenge arises due to the inherent complexity of the diversity of opinion fusion rules and the difficulty in capturing equilibrium states while avoiding over-smoothing. This paper constructs a unified opinion dynamics model to integrate different opinion fusion rules and generates corresponding synthetic datasets. To fully leverage the advantages of unified opinion dynamics, we introduces UniGO, a framework for modeling opinion evolution on graphs. Using a coarsen-refine mechanism, UniGO efficiently models opinion dynamics through a graph neural network, mitigating over-smoothing while preserving equilibrium phenomena. UniGO leverages pretraining on synthetic datasets, which enhances its ability to generalize to real-world scenarios, providing a viable paradigm for applications of opinion dynamics. Experimental results on both synthetic and real-world datasets demonstrate UniGO's effectiveness in capturing complex opinion formation processes and predicting future evolution. The pretrained model also shows strong generalization capability, validating the benefits of using synthetic data to boost real-world performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11519v1-abstract-full').style.display = 'none'; document.getElementById('2502.11519v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WWW2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11131">arXiv:2502.11131</a> <span> [<a href="https://arxiv.org/pdf/2502.11131">pdf</a>, <a href="https://arxiv.org/format/2502.11131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Improving Similar Case Retrieval Ranking Performance By Revisiting RankSVM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuqi Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yan Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11131v1-abstract-short" style="display: inline;"> Given the rapid development of Legal AI, a lot of attention has been paid to one of the most important legal AI tasks--similar case retrieval, especially with language models to use. In our paper, however, we try to improve the ranking performance of current models from the perspective of learning to rank instead of language models. Specifically, we conduct experiments using a pairwise method--Ran… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11131v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11131v1-abstract-full" style="display: none;"> Given the rapid development of Legal AI, a lot of attention has been paid to one of the most important legal AI tasks--similar case retrieval, especially with language models to use. In our paper, however, we try to improve the ranking performance of current models from the perspective of learning to rank instead of language models. Specifically, we conduct experiments using a pairwise method--RankSVM as the classifier to substitute a fully connected layer, combined with commonly used language models on similar case retrieval datasets LeCaRDv1 and LeCaRDv2. We finally come to the conclusion that RankSVM could generally help improve the retrieval performance on the LeCaRDv1 and LeCaRDv2 datasets compared with original classifiers by optimizing the precise ranking. It could also help mitigate overfitting owing to class imbalance. Our code is available in https://github.com/liuyuqi123study/RankSVM_for_SLR <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11131v1-abstract-full').style.display = 'none'; document.getElementById('2502.11131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11121">arXiv:2502.11121</a> <span> [<a href="https://arxiv.org/pdf/2502.11121">pdf</a>, <a href="https://arxiv.org/format/2502.11121">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Reversible Data Hiding over Encrypted Images via Intrinsic Correlation in Block-Based Secret Sharing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+J">Jianhui Zou</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weijia Cao</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+S">Shuang Yi</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yifeng Zheng</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Z">Zhongyun Hua</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11121v1-abstract-short" style="display: inline;"> With the rapid advancements in information technology, reversible data hiding over encrypted images (RDH-EI) has become essential for secure image management in cloud services. However, existing RDH-EI schemes often suffer from high computational complexity, low embedding rates, and excessive data expansion. This paper addresses these challenges by first analyzing the block-based secret sharing in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11121v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11121v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11121v1-abstract-full" style="display: none;"> With the rapid advancements in information technology, reversible data hiding over encrypted images (RDH-EI) has become essential for secure image management in cloud services. However, existing RDH-EI schemes often suffer from high computational complexity, low embedding rates, and excessive data expansion. This paper addresses these challenges by first analyzing the block-based secret sharing in existing schemes, revealing significant data redundancy within image blocks. Based on this observation, we propose two space-preserving methods: the direct space-vacating method and the image-shrinking-based space-vacating method. Using these techniques, we design two novel RDH-EI schemes: a high-capacity RDH-EI scheme and a size-reduced RDH-EI scheme. The high-capacity RDH-EI scheme directly creates embedding space in encrypted images, eliminating the need for complex space-vacating operations and achieving higher and more stable embedding rates. In contrast, the size-reduced RDH-EI scheme minimizes data expansion by discarding unnecessary shares, resulting in smaller encrypted images. Experimental results show that the high-capacity RDH-EI scheme outperforms existing methods in terms of embedding capacity, while the size-reduced RDH-EI scheme excels in minimizing data expansion. Both schemes provide effective solutions to the challenges in RDH-EI, offering promising applications in fields such as medical imaging and cloud storage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11121v1-abstract-full').style.display = 'none'; document.getElementById('2502.11121v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10696">arXiv:2502.10696</a> <span> [<a href="https://arxiv.org/pdf/2502.10696">pdf</a>, <a href="https://arxiv.org/format/2502.10696">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Improving Retrieval-Augmented Deep Assertion Generation via Joint Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Quanjun Zhang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+C">Chunrong Fang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yi Zheng</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+R">Ruixiang Qian</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shengcheng Yu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jianyi Zhou</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yun Yang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhenyu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10696v1-abstract-short" style="display: inline;"> Unit testing attempts to validate the correctness of basic units of the software system under test and has a crucial role in software development and testing. Very recent work proposes a retrieve-and-edit approach to generate unit test oracles, i.e., assertions. Despite being promising, it is still far from perfect due to some limitations, such as splitting assertion retrieval and generation into… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10696v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10696v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10696v1-abstract-full" style="display: none;"> Unit testing attempts to validate the correctness of basic units of the software system under test and has a crucial role in software development and testing. Very recent work proposes a retrieve-and-edit approach to generate unit test oracles, i.e., assertions. Despite being promising, it is still far from perfect due to some limitations, such as splitting assertion retrieval and generation into two separate components without benefiting each other. In this paper, we propose AG-RAG, a retrieval-augmented automated assertion generation approach that leverages external codebases and joint training to address various technical limitations of prior work. Inspired by the plastic surgery hypothesis, AG-RAG attempts to combine relevant unit tests and advanced pre-trained language models (PLMs) with retrieval-augmented fine-tuning. AG-RAG builds a dense retriever to search for relevant test-assert pairs (TAPs) with semantic matching and a retrieval-augmented generator to synthesize accurate assertions with the focal-test and retrieved TAPs as input. Besides, AG-RAG leverages a code-aware language model CodeT5 as the cornerstone to facilitate both assertion retrieval and generation tasks. Furthermore, the retriever is optimized in conjunction with the generator as a whole pipeline with a joint training strategy. This unified design fully adapts both components specifically for retrieving more useful TAPs, thereby generating accurate assertions. We extensively evaluate AG-RAG against six state-of-the-art AG approaches on two benchmarks and three metrics. Experimental results show that AG-RAG significantly outperforms previous AG approaches on all benchmarks and metrics, e.g., improving the most recent baseline EditAS by 20.82% and 26.98% in terms of accuracy. AG-RAG also correctly generates 1739 and 2866 unique assertions that all baselines fail to generate, 3.45X and 9.20X more than EditAS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10696v1-abstract-full').style.display = 'none'; document.getElementById('2502.10696v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IEEE Transactions on Software Engineering (TSE 2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10610">arXiv:2502.10610</a> <span> [<a href="https://arxiv.org/pdf/2502.10610">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Reachability-Aware Reinforcement Learning for Collision Avoidance in Human-Machine Shared Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shiyue Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junzhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Masoud%2C+N">Neda Masoud</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianxiong Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yinan Zheng</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+X">Xiaohui Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10610v1-abstract-short" style="display: inline;"> Human-machine shared control in critical collision scenarios aims to aid drivers' accident avoidance through intervening only when necessary. Existing methods count on replanning collision-free trajectories and imposing human-machine tracking, which usually interrupts the driver's intent and increases the risk of conflict. Additionally, the lack of guaranteed trajectory feasibility under extreme c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10610v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10610v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10610v1-abstract-full" style="display: none;"> Human-machine shared control in critical collision scenarios aims to aid drivers' accident avoidance through intervening only when necessary. Existing methods count on replanning collision-free trajectories and imposing human-machine tracking, which usually interrupts the driver's intent and increases the risk of conflict. Additionally, the lack of guaranteed trajectory feasibility under extreme conditions can compromise safety and reliability. This paper introduces a Reachability-Aware Reinforcement Learning framework for shared control, guided by Hamilton-Jacobi (HJ) reachability analysis. Machine intervention is activated only when the vehicle approaches the Collision Avoidance Reachable Set (CARS), which represents states where collision is unavoidable. First, we precompute the reachability distributions and the CARS by solving the Bellman equation using offline data. To reduce human-machine conflicts, we develop a driver model for sudden obstacles and propose an authority allocation strategy considering key collision avoidance features. Finally, we train a reinforcement learning agent to reduce human-machine conflicts while enforcing the hard constraint of avoiding entry into the CARS. The proposed method was tested on a real vehicle platform. Results show that the controller intervenes effectively near CARS to prevent collisions while maintaining improved original driving task performance. Robustness analysis further supports its flexibility across different driver attributes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10610v1-abstract-full').style.display = 'none'; document.getElementById('2502.10610v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 13 figures, submitted to Advanced Engineering Informatics (ADVEI)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09967">arXiv:2502.09967</a> <span> [<a href="https://arxiv.org/pdf/2502.09967">pdf</a>, <a href="https://arxiv.org/format/2502.09967">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VicKAM: Visual Conceptual Knowledge Guided Action Map for Weakly Supervised Group Activity Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhuming Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yihao Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiarui Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yaofei Wu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yan Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zun Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lifang Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09967v1-abstract-short" style="display: inline;"> Existing weakly supervised group activity recognition methods rely on object detectors or attention mechanisms to capture key areas automatically. However, they overlook the semantic information associated with captured areas, which may adversely affect the recognition performance. In this paper, we propose a novel framework named Visual Conceptual Knowledge Guided Action Map (VicKAM) which effect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09967v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09967v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09967v1-abstract-full" style="display: none;"> Existing weakly supervised group activity recognition methods rely on object detectors or attention mechanisms to capture key areas automatically. However, they overlook the semantic information associated with captured areas, which may adversely affect the recognition performance. In this paper, we propose a novel framework named Visual Conceptual Knowledge Guided Action Map (VicKAM) which effectively captures the locations of individual actions and integrates them with action semantics for weakly supervised group activity recognition.It generates individual action prototypes from training set as visual conceptual knowledge to bridge action semantics and visual representations. Guided by this knowledge, VicKAM produces action maps that indicate the likelihood of each action occurring at various locations, based on image correlation theorem. It further augments individual action maps using group activity related statistical information, representing individual action distribution under different group activities, to establish connections between action maps and specific group activities. The augmented action map is incorporated with action semantic representations for group activity recognition.Extensive experiments on two public benchmarks, the Volleyball and the NBA datasets, demonstrate the effectiveness of our proposed method, even in cases of limited training data. The code will be released later. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09967v1-abstract-full').style.display = 'none'; document.getElementById('2502.09967v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09808">arXiv:2502.09808</a> <span> [<a href="https://arxiv.org/pdf/2502.09808">pdf</a>, <a href="https://arxiv.org/format/2502.09808">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> VIRGOS: Secure Graph Convolutional Network on Vertically Split Data from Sparse Matrix Decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yu Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qizhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lichun Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Kai Zhou</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+S">Shan Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09808v1-abstract-short" style="display: inline;"> Securely computing graph convolutional networks (GCNs) is critical for applying their analytical capabilities to privacy-sensitive data like social/credit networks. Multiplying a sparse yet large adjacency matrix of a graph in GCN--a core operation in training/inference--poses a performance bottleneck in secure GCNs. Consider a GCN with $|V|$ nodes and $|E|$ edges; it incurs a large $O(|V|^2)$ com… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09808v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09808v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09808v1-abstract-full" style="display: none;"> Securely computing graph convolutional networks (GCNs) is critical for applying their analytical capabilities to privacy-sensitive data like social/credit networks. Multiplying a sparse yet large adjacency matrix of a graph in GCN--a core operation in training/inference--poses a performance bottleneck in secure GCNs. Consider a GCN with $|V|$ nodes and $|E|$ edges; it incurs a large $O(|V|^2)$ communication overhead. Modeling bipartite graphs and leveraging the monotonicity of non-zero entry locations, we propose a co-design harmonizing secure multi-party computation (MPC) with matrix sparsity. Our sparse matrix decomposition transforms an arbitrary sparse matrix into a product of structured matrices. Specialized MPC protocols for oblivious permutation and selection multiplication are then tailored, enabling our secure sparse matrix multiplication ($(SM)^2$) protocol, optimized for secure multiplication of these structured matrices. Together, these techniques take $O(|E|)$ communication in constant rounds. Supported by $(SM)^2$, we present Virgos, a secure 2-party framework that is communication-efficient and memory-friendly on standard vertically-partitioned graph datasets. Performance of Virgos has been empirically validated across diverse network conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09808v1-abstract-full').style.display = 'none'; document.getElementById('2502.09808v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08391">arXiv:2502.08391</a> <span> [<a href="https://arxiv.org/pdf/2502.08391">pdf</a>, <a href="https://arxiv.org/format/2502.08391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ViLa-MIL: Dual-scale Vision-Language Multiple Instance Learning for Whole Slide Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+J">Jiangbo Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chen Li</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+T">Tieliang Gong</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+H">Huazhu Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08391v1-abstract-short" style="display: inline;"> Multiple instance learning (MIL)-based framework has become the mainstream for processing the whole slide image (WSI) with giga-pixel size and hierarchical image context in digital pathology. However, these methods heavily depend on a substantial number of bag-level labels and solely learn from the original slides, which are easily affected by variations in data distribution. Recently, vision lang… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08391v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08391v1-abstract-full" style="display: none;"> Multiple instance learning (MIL)-based framework has become the mainstream for processing the whole slide image (WSI) with giga-pixel size and hierarchical image context in digital pathology. However, these methods heavily depend on a substantial number of bag-level labels and solely learn from the original slides, which are easily affected by variations in data distribution. Recently, vision language model (VLM)-based methods introduced the language prior by pre-training on large-scale pathological image-text pairs. However, the previous text prompt lacks the consideration of pathological prior knowledge, therefore does not substantially boost the model's performance. Moreover, the collection of such pairs and the pre-training process are very time-consuming and source-intensive.To solve the above problems, we propose a dual-scale vision-language multiple instance learning (ViLa-MIL) framework for whole slide image classification. Specifically, we propose a dual-scale visual descriptive text prompt based on the frozen large language model (LLM) to boost the performance of VLM effectively. To transfer the VLM to process WSI efficiently, for the image branch, we propose a prototype-guided patch decoder to aggregate the patch features progressively by grouping similar patches into the same prototype; for the text branch, we introduce a context-guided text decoder to enhance the text features by incorporating the multi-granular image contexts. Extensive studies on three multi-cancer and multi-center subtyping datasets demonstrate the superiority of ViLa-MIL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08391v1-abstract-full').style.display = 'none'; document.getElementById('2502.08391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2024 (Updated version with corrections for typos and errors.)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07309">arXiv:2502.07309</a> <span> [<a href="https://arxiv.org/pdf/2502.07309">pdf</a>, <a href="https://arxiv.org/format/2502.07309">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Semi-Supervised Vision-Centric 3D Occupancy World Model for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pengfei Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yupeng Zheng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Wei Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yilun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07309v1-abstract-short" style="display: inline;"> Understanding world dynamics is crucial for planning in autonomous driving. Recent methods attempt to achieve this by learning a 3D occupancy world model that forecasts future surrounding scenes based on current observation. However, 3D occupancy labels are still required to produce promising results. Considering the high annotation cost for 3D outdoor scenes, we propose a semi-supervised vision-c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07309v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07309v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07309v1-abstract-full" style="display: none;"> Understanding world dynamics is crucial for planning in autonomous driving. Recent methods attempt to achieve this by learning a 3D occupancy world model that forecasts future surrounding scenes based on current observation. However, 3D occupancy labels are still required to produce promising results. Considering the high annotation cost for 3D outdoor scenes, we propose a semi-supervised vision-centric 3D occupancy world model, PreWorld, to leverage the potential of 2D labels through a novel two-stage training paradigm: the self-supervised pre-training stage and the fully-supervised fine-tuning stage. Specifically, during the pre-training stage, we utilize an attribute projection head to generate different attribute fields of a scene (e.g., RGB, density, semantic), thus enabling temporal supervision from 2D labels via volume rendering techniques. Furthermore, we introduce a simple yet effective state-conditioned forecasting module to recursively forecast future occupancy and ego trajectory in a direct manner. Extensive experiments on the nuScenes dataset validate the effectiveness and scalability of our method, and demonstrate that PreWorld achieves competitive performance across 3D occupancy prediction, 4D occupancy forecasting and motion planning tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07309v1-abstract-full').style.display = 'none'; document.getElementById('2502.07309v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05932">arXiv:2502.05932</a> <span> [<a href="https://arxiv.org/pdf/2502.05932">pdf</a>, <a href="https://arxiv.org/format/2502.05932">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Skill Expansion and Composition in Parameter Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tenglong Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianxiong Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yinan Zheng</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+H">Haoyi Niu</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+Y">Yixing Lan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xin Xu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+X">Xianyuan Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05932v1-abstract-short" style="display: inline;"> Humans excel at reusing prior knowledge to address new challenges and developing skills while solving problems. This paradigm becomes increasingly popular in the development of autonomous agents, as it develops systems that can self-evolve in response to new challenges like human beings. However, previous methods suffer from limited training efficiency when expanding new skills and fail to fully l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05932v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05932v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05932v1-abstract-full" style="display: none;"> Humans excel at reusing prior knowledge to address new challenges and developing skills while solving problems. This paradigm becomes increasingly popular in the development of autonomous agents, as it develops systems that can self-evolve in response to new challenges like human beings. However, previous methods suffer from limited training efficiency when expanding new skills and fail to fully leverage prior knowledge to facilitate new task learning. In this paper, we propose Parametric Skill Expansion and Composition (PSEC), a new framework designed to iteratively evolve the agents' capabilities and efficiently address new challenges by maintaining a manageable skill library. This library can progressively integrate skill primitives as plug-and-play Low-Rank Adaptation (LoRA) modules in parameter-efficient finetuning, facilitating efficient and flexible skill expansion. This structure also enables the direct skill compositions in parameter space by merging LoRA modules that encode different skills, leveraging shared information across skills to effectively program new skills. Based on this, we propose a context-aware module to dynamically activate different skills to collaboratively handle new tasks. Empowering diverse applications including multi-objective composition, dynamics shift, and continual policy shift, the results on D4RL, DSRL benchmarks, and the DeepMind Control Suite show that PSEC exhibits superior capacity to leverage prior knowledge to efficiently tackle new challenges, as well as expand its skill libraries to evolve the capabilities. Project website: https://ltlhuuu.github.io/PSEC/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05932v1-abstract-full').style.display = 'none'; document.getElementById('2502.05932v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025, 37 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05795">arXiv:2502.05795</a> <span> [<a href="https://arxiv.org/pdf/2502.05795">pdf</a>, <a href="https://arxiv.org/format/2502.05795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> The Curse of Depth in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+W">Wenfang Sun</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyuan Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pengxiang Li</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+L">Lu Yin</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shiwei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05795v1-abstract-short" style="display: inline;"> In this paper, we introduce the Curse of Depth, a concept that highlights, explains, and addresses the recent observation in modern Large Language Models(LLMs) where nearly half of the layers are less effective than expected. We first confirm the wide existence of this phenomenon across the most popular families of LLMs such as Llama, Mistral, DeepSeek, and Qwen. Our analysis, theoretically and em… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05795v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05795v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05795v1-abstract-full" style="display: none;"> In this paper, we introduce the Curse of Depth, a concept that highlights, explains, and addresses the recent observation in modern Large Language Models(LLMs) where nearly half of the layers are less effective than expected. We first confirm the wide existence of this phenomenon across the most popular families of LLMs such as Llama, Mistral, DeepSeek, and Qwen. Our analysis, theoretically and empirically, identifies that the underlying reason for the ineffectiveness of deep layers in LLMs is the widespread usage of Pre-Layer Normalization (Pre-LN). While Pre-LN stabilizes the training of Transformer LLMs, its output variance exponentially grows with the model depth, which undesirably causes the derivative of the deep Transformer blocks to be an identity matrix, and therefore barely contributes to the training. To resolve this training pitfall, we propose LayerNorm Scaling, which scales the variance of output of the layer normalization inversely by the square root of its depth. This simple modification mitigates the output variance explosion of deeper Transformer layers, improving their contribution. Our experimental results, spanning model sizes from 130M to 1B, demonstrate that LayerNorm Scaling significantly enhances LLM pre-training performance compared to Pre-LN. Moreover, this improvement seamlessly carries over to supervised fine-tuning. All these gains can be attributed to the fact that LayerNorm Scaling enables deeper layers to contribute more effectively during training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05795v1-abstract-full').style.display = 'none'; document.getElementById('2502.05795v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05724">arXiv:2502.05724</a> <span> [<a href="https://arxiv.org/pdf/2502.05724">pdf</a>, <a href="https://arxiv.org/format/2502.05724">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Link Prediction for Directed Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+M">Mingguo He</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yuhe Guo</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yanping Zheng</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zhewei Wei</a>, <a href="/search/cs?searchtype=author&query=G%C3%BCnnemann%2C+S">Stephan G眉nnemann</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+X">Xiaokui Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05724v1-abstract-short" style="display: inline;"> Link prediction for directed graphs is a crucial task with diverse real-world applications. Recent advances in embedding methods and Graph Neural Networks (GNNs) have shown promising improvements. However, these methods often lack a thorough analysis of embedding expressiveness and suffer from ineffective benchmarks for a fair evaluation. In this paper, we propose a unified framework to assess the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05724v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05724v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05724v1-abstract-full" style="display: none;"> Link prediction for directed graphs is a crucial task with diverse real-world applications. Recent advances in embedding methods and Graph Neural Networks (GNNs) have shown promising improvements. However, these methods often lack a thorough analysis of embedding expressiveness and suffer from ineffective benchmarks for a fair evaluation. In this paper, we propose a unified framework to assess the expressiveness of existing methods, highlighting the impact of dual embeddings and decoder design on performance. To address limitations in current experimental setups, we introduce DirLinkBench, a robust new benchmark with comprehensive coverage and standardized evaluation. The results show that current methods struggle to achieve strong performance on the new benchmark, while DiGAE outperforms others overall. We further revisit DiGAE theoretically, showing its graph convolution aligns with GCN on an undirected bipartite graph. Inspired by these insights, we propose a novel spectral directed graph auto-encoder SDGAE that achieves SOTA results on DirLinkBench. Finally, we analyze key factors influencing directed link prediction and highlight open challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05724v1-abstract-full').style.display = 'none'; document.getElementById('2502.05724v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05558">arXiv:2502.05558</a> <span> [<a href="https://arxiv.org/pdf/2502.05558">pdf</a>, <a href="https://arxiv.org/format/2502.05558">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Large Memory Network for Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+H">Hui Lu</a>, <a href="/search/cs?searchtype=author&query=Chai%2C+Z">Zheng Chai</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuchao Zheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhe Chen</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+D">Deping Xie</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+P">Peng Xu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xun Zhou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Di Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05558v3-abstract-short" style="display: inline;"> Modeling user behavior sequences in recommender systems is essential for understanding user preferences over time, enabling personalized and accurate recommendations for improving user retention and enhancing business values. Despite its significance, there are two challenges for current sequential modeling approaches. From the spatial dimension, it is difficult to mutually perceive similar users'… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05558v3-abstract-full').style.display = 'inline'; document.getElementById('2502.05558v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05558v3-abstract-full" style="display: none;"> Modeling user behavior sequences in recommender systems is essential for understanding user preferences over time, enabling personalized and accurate recommendations for improving user retention and enhancing business values. Despite its significance, there are two challenges for current sequential modeling approaches. From the spatial dimension, it is difficult to mutually perceive similar users' interests for a generalized intention understanding; from the temporal dimension, current methods are generally prone to forgetting long-term interests due to the fixed-length input sequence. In this paper, we present Large Memory Network (LMN), providing a novel idea by compressing and storing user history behavior information in a large-scale memory block. With the elaborated online deployment strategy, the memory block can be easily scaled up to million-scale in the industry. Extensive offline comparison experiments, memory scaling up experiments, and online A/B test on Douyin E-Commerce Search (ECS) are performed, validating the superior performance of LMN. Currently, LMN has been fully deployed in Douyin ECS, serving millions of users each day. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05558v3-abstract-full').style.display = 'none'; document.getElementById('2502.05558v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> WWW 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05523">arXiv:2502.05523</a> <span> [<a href="https://arxiv.org/pdf/2502.05523">pdf</a>, <a href="https://arxiv.org/format/2502.05523">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Domain Scaling for Personalized Sequential Modeling in Recommenders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chai%2C+Z">Zheng Chai</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Hui Lu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Di Chen</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Q">Qin Ren</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuchao Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05523v2-abstract-short" style="display: inline;"> Users generally exhibit complex behavioral patterns and diverse intentions in multiple business scenarios of super applications like Douyin, presenting great challenges to current industrial multi-domain recommenders. To mitigate the discrepancies across diverse domains, researches and industrial practices generally emphasize sophisticated network structures to accomodate diverse data distribution… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05523v2-abstract-full').style.display = 'inline'; document.getElementById('2502.05523v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05523v2-abstract-full" style="display: none;"> Users generally exhibit complex behavioral patterns and diverse intentions in multiple business scenarios of super applications like Douyin, presenting great challenges to current industrial multi-domain recommenders. To mitigate the discrepancies across diverse domains, researches and industrial practices generally emphasize sophisticated network structures to accomodate diverse data distributions, while neglecting the inherent understanding of user behavioral sequence from the multi-domain perspective. In this paper, we present Adaptive Domain Scaling (ADS) model, which comprehensively enhances the personalization capability in target-aware sequence modeling across multiple domains. Specifically, ADS comprises of two major modules, including personalized sequence representation generation (PSRG) and personalized candidate representation generation (PCRG). The modules contribute to the tailored multi-domain learning by dynamically learning both the user behavioral sequence item representation and the candidate target item representation under different domains, facilitating adaptive user intention understanding. Experiments are performed on both a public dataset and two billion-scaled industrial datasets, and the extensive results verify the high effectiveness and compatibility of ADS. Besides, we conduct online experiments on two influential business scenarios including Douyin Advertisement Platform and Douyin E-commerce Service Platform, both of which show substantial business improvements. Currently, ADS has been fully deployed in many recommendation services at ByteDance, serving billions of users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05523v2-abstract-full').style.display = 'none'; document.getElementById('2502.05523v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05482">arXiv:2502.05482</a> <span> [<a href="https://arxiv.org/pdf/2502.05482">pdf</a>, <a href="https://arxiv.org/format/2502.05482">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robustifying Fourier Features Embeddings for Implicit Neural Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+M">Mingze Ma</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qingtian Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+Y">Yifan Zhan</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Z">Zhengwei Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hongjun Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yinqiang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05482v1-abstract-short" style="display: inline;"> Implicit Neural Representations (INRs) employ neural networks to represent continuous functions by mapping coordinates to the corresponding values of the target function, with applications e.g., inverse graphics. However, INRs face a challenge known as spectral bias when dealing with scenes containing varying frequencies. To overcome spectral bias, the most common approach is the Fourier features-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05482v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05482v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05482v1-abstract-full" style="display: none;"> Implicit Neural Representations (INRs) employ neural networks to represent continuous functions by mapping coordinates to the corresponding values of the target function, with applications e.g., inverse graphics. However, INRs face a challenge known as spectral bias when dealing with scenes containing varying frequencies. To overcome spectral bias, the most common approach is the Fourier features-based methods such as positional encoding. However, Fourier features-based methods will introduce noise to output, which degrades their performances when applied to downstream tasks. In response, this paper initially hypothesizes that combining multi-layer perceptrons (MLPs) with Fourier feature embeddings mutually enhances their strengths, yet simultaneously introduces limitations inherent in Fourier feature embeddings. By presenting a simple theorem, we validate our hypothesis, which serves as a foundation for the design of our solution. Leveraging these insights, we propose the use of multi-layer perceptrons (MLPs) without additive <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05482v1-abstract-full').style.display = 'none'; document.getElementById('2502.05482v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03825">arXiv:2502.03825</a> <span> [<a href="https://arxiv.org/pdf/2502.03825">pdf</a>, <a href="https://arxiv.org/format/2502.03825">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Synthetic Poisoning Attacks: The Impact of Poisoned MRI Image on U-Net Brain Tumor Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+T">Tianhao Li</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+T">Tianyu Zeng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yujia Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chulong Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jingyu Lu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haotian Huang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+C">Chuangxin Chu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+F">Fang-Fang Yin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhenyu Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03825v1-abstract-short" style="display: inline;"> Deep learning-based medical image segmentation models, such as U-Net, rely on high-quality annotated datasets to achieve accurate predictions. However, the increasing use of generative models for synthetic data augmentation introduces potential risks, particularly in the absence of rigorous quality control. In this paper, we investigate the impact of synthetic MRI data on the robustness and segmen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03825v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03825v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03825v1-abstract-full" style="display: none;"> Deep learning-based medical image segmentation models, such as U-Net, rely on high-quality annotated datasets to achieve accurate predictions. However, the increasing use of generative models for synthetic data augmentation introduces potential risks, particularly in the absence of rigorous quality control. In this paper, we investigate the impact of synthetic MRI data on the robustness and segmentation accuracy of U-Net models for brain tumor segmentation. Specifically, we generate synthetic T1-contrast-enhanced (T1-Ce) MRI scans using a GAN-based model with a shared encoding-decoding framework and shortest-path regularization. To quantify the effect of synthetic data contamination, we train U-Net models on progressively "poisoned" datasets, where synthetic data proportions range from 16.67% to 83.33%. Experimental results on a real MRI validation set reveal a significant performance degradation as synthetic data increases, with Dice coefficients dropping from 0.8937 (33.33% synthetic) to 0.7474 (83.33% synthetic). Accuracy and sensitivity exhibit similar downward trends, demonstrating the detrimental effect of synthetic data on segmentation robustness. These findings underscore the importance of quality control in synthetic data integration and highlight the risks of unregulated synthetic augmentation in medical image analysis. Our study provides critical insights for the development of more reliable and trustworthy AI-driven medical imaging systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03825v1-abstract-full').style.display = 'none'; document.getElementById('2502.03825v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03501">arXiv:2502.03501</a> <span> [<a href="https://arxiv.org/pdf/2502.03501">pdf</a>, <a href="https://arxiv.org/format/2502.03501">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Proxy Prompt: Endowing SAM and SAM 2 with Auto-Interactive-Prompt for Medical Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xinyi%2C+W">Wang Xinyi</a>, <a href="/search/cs?searchtype=author&query=Hongyu%2C+K">Kang Hongyu</a>, <a href="/search/cs?searchtype=author&query=Peishan%2C+W">Wei Peishan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuai Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yu Sun</a>, <a href="/search/cs?searchtype=author&query=Lam%2C+S+K">Sai Kit Lam</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yongping Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03501v1-abstract-short" style="display: inline;"> In this paper, we aim to address the unmet demand for automated prompting and enhanced human-model interactions of SAM and SAM2 for the sake of promoting their widespread clinical adoption. Specifically, we propose Proxy Prompt (PP), auto-generated by leveraging non-target data with a pre-annotated mask. We devise a novel 3-step context-selection strategy for adaptively selecting the most represen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03501v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03501v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03501v1-abstract-full" style="display: none;"> In this paper, we aim to address the unmet demand for automated prompting and enhanced human-model interactions of SAM and SAM2 for the sake of promoting their widespread clinical adoption. Specifically, we propose Proxy Prompt (PP), auto-generated by leveraging non-target data with a pre-annotated mask. We devise a novel 3-step context-selection strategy for adaptively selecting the most representative contextual information from non-target data via vision mamba and selective maps, empowering the guiding capability of non-target image-mask pairs for segmentation on target image/video data. To reinforce human-model interactions in PP, we further propose a contextual colorization module via a dual-reverse cross-attention to enhance interactions between target features and contextual-embedding with amplifying distinctive features of user-defined object(s). Via extensive evaluations, our method achieves state-of-the-art performance on four public datasets and yields comparable results with fully-trained models, even when trained with only 16 image masks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03501v1-abstract-full').style.display = 'none'; document.getElementById('2502.03501v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02975">arXiv:2502.02975</a> <span> [<a href="https://arxiv.org/pdf/2502.02975">pdf</a>, <a href="https://arxiv.org/format/2502.02975">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TGB-Seq Benchmark: Challenging Temporal GNNs with Complex Sequential Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yi%2C+L">Lu Yi</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+J">Jie Peng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yanping Zheng</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zhewei Wei</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yuhang Ye</a>, <a href="/search/cs?searchtype=author&query=Zixuan%2C+Y">Yue Zixuan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zengfeng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02975v1-abstract-short" style="display: inline;"> Future link prediction is a fundamental challenge in various real-world dynamic systems. To address this, numerous temporal graph neural networks (temporal GNNs) and benchmark datasets have been developed. However, these datasets often feature excessive repeated edges and lack complex sequential dynamics, a key characteristic inherent in many real-world applications such as recommender systems and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02975v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02975v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02975v1-abstract-full" style="display: none;"> Future link prediction is a fundamental challenge in various real-world dynamic systems. To address this, numerous temporal graph neural networks (temporal GNNs) and benchmark datasets have been developed. However, these datasets often feature excessive repeated edges and lack complex sequential dynamics, a key characteristic inherent in many real-world applications such as recommender systems and ``Who-To-Follow'' on social networks. This oversight has led existing methods to inadvertently downplay the importance of learning sequential dynamics, focusing primarily on predicting repeated edges. In this study, we demonstrate that existing methods, such as GraphMixer and DyGFormer, are inherently incapable of learning simple sequential dynamics, such as ``a user who has followed OpenAI and Anthropic is more likely to follow AI at Meta next.'' Motivated by this issue, we introduce the Temporal Graph Benchmark with Sequential Dynamics (TGB-Seq), a new benchmark carefully curated to minimize repeated edges, challenging models to learn sequential dynamics and generalize to unseen edges. TGB-Seq comprises large real-world datasets spanning diverse domains, including e-commerce interactions, movie ratings, business reviews, social networks, citation networks and web link networks. Benchmarking experiments reveal that current methods usually suffer significant performance degradation and incur substantial training costs on TGB-Seq, posing new challenges and opportunities for future research. TGB-Seq datasets, leaderboards, and example codes are available at https://tgb-seq.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02975v1-abstract-full').style.display = 'none'; document.getElementById('2502.02975v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">published at ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01376">arXiv:2502.01376</a> <span> [<a href="https://arxiv.org/pdf/2502.01376">pdf</a>, <a href="https://arxiv.org/format/2502.01376">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1177/02783649241234364">10.1177/02783649241234364 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Compliance while resisting: a shear-thickening fluid controller for physical human-robot interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lu Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lipeng Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiangchi Chen</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Haojian Lu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yu Zheng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jun Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhengyou Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+R">Rong Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01376v1-abstract-short" style="display: inline;"> Physical human-robot interaction (pHRI) is widely needed in many fields, such as industrial manipulation, home services, and medical rehabilitation, and puts higher demands on the safety of robots. Due to the uncertainty of the working environment, the pHRI may receive unexpected impact interference, which affects the safety and smoothness of the task execution. The commonly used linear admittance… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01376v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01376v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01376v1-abstract-full" style="display: none;"> Physical human-robot interaction (pHRI) is widely needed in many fields, such as industrial manipulation, home services, and medical rehabilitation, and puts higher demands on the safety of robots. Due to the uncertainty of the working environment, the pHRI may receive unexpected impact interference, which affects the safety and smoothness of the task execution. The commonly used linear admittance control (L-AC) can cope well with high-frequency small-amplitude noise, but for medium-frequency high-intensity impact, the effect is not as good. Inspired by the solid-liquid phase change nature of shear-thickening fluid, we propose a Shear-thickening Fluid Control (SFC) that can achieve both an easy human-robot collaboration and resistance to impact interference. The SFC's stability, passivity, and phase trajectory are analyzed in detail, the frequency and time domain properties are quantified, and parameter constraints in discrete control and coupled stability conditions are provided. We conducted simulations to compare the frequency and time domain characteristics of L-AC, nonlinear admittance controller (N-AC), and SFC, and validated their dynamic properties. In real-world experiments, we compared the performance of L-AC, N-AC, and SFC in both fixed and mobile manipulators. L-AC exhibits weak resistance to impact. N-AC can resist moderate impacts but not high-intensity ones, and may exhibit self-excited oscillations. In contrast, SFC demonstrated superior impact resistance and maintained stable collaboration, enhancing comfort in cooperative water delivery tasks. Additionally, a case study was conducted in a factory setting, further affirming the SFC's capability in facilitating human-robot collaborative manipulation and underscoring its potential in industrial applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01376v1-abstract-full').style.display = 'none'; document.getElementById('2502.01376v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01004">arXiv:2502.01004</a> <span> [<a href="https://arxiv.org/pdf/2502.01004">pdf</a>, <a href="https://arxiv.org/format/2502.01004">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ZeroBP: Learning Position-Aware Correspondence for Zero-shot 6D Pose Estimation in Bin-Picking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jianqiu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zikun Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xin Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Ye Zheng</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+T">Tianpeng Bao</a>, <a href="/search/cs?searchtype=author&query=He%2C+Z">Zhenyu He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01004v1-abstract-short" style="display: inline;"> Bin-picking is a practical and challenging robotic manipulation task, where accurate 6D pose estimation plays a pivotal role. The workpieces in bin-picking are typically textureless and randomly stacked in a bin, which poses a significant challenge to 6D pose estimation. Existing solutions are typically learning-based methods, which require object-specific training. Their efficiency of practical d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01004v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01004v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01004v1-abstract-full" style="display: none;"> Bin-picking is a practical and challenging robotic manipulation task, where accurate 6D pose estimation plays a pivotal role. The workpieces in bin-picking are typically textureless and randomly stacked in a bin, which poses a significant challenge to 6D pose estimation. Existing solutions are typically learning-based methods, which require object-specific training. Their efficiency of practical deployment for novel workpieces is highly limited by data collection and model retraining. Zero-shot 6D pose estimation is a potential approach to address the issue of deployment efficiency. Nevertheless, existing zero-shot 6D pose estimation methods are designed to leverage feature matching to establish point-to-point correspondences for pose estimation, which is less effective for workpieces with textureless appearances and ambiguous local regions. In this paper, we propose ZeroBP, a zero-shot pose estimation framework designed specifically for the bin-picking task. ZeroBP learns Position-Aware Correspondence (PAC) between the scene instance and its CAD model, leveraging both local features and global positions to resolve the mismatch issue caused by ambiguous regions with similar shapes and appearances. Extensive experiments on the ROBI dataset demonstrate that ZeroBP outperforms state-of-the-art zero-shot pose estimation methods, achieving an improvement of 9.1% in average recall of correct poses. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01004v1-abstract-full').style.display = 'none'; document.getElementById('2502.01004v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICRA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00639">arXiv:2502.00639</a> <span> [<a href="https://arxiv.org/pdf/2502.00639">pdf</a>, <a href="https://arxiv.org/format/2502.00639">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Zeroth-order Informed Fine-Tuning for Diffusion Model: A Recursive Likelihood Ratio Optimizer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+T">Tao Ren</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zishi Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zehao Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jingyang Jiang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+S">Shentao Qin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guanghao Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yi Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinping Li</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+M">Min Zhan</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Y">Yijie Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00639v1-abstract-short" style="display: inline;"> The probabilistic diffusion model (DM), generating content by inferencing through a recursive chain structure, has emerged as a powerful framework for visual generation. After pre-training on enormous unlabeled data, the model needs to be properly aligned to meet requirements for downstream applications. How to efficiently align the foundation DM is a crucial task. Contemporary methods are either… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00639v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00639v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00639v1-abstract-full" style="display: none;"> The probabilistic diffusion model (DM), generating content by inferencing through a recursive chain structure, has emerged as a powerful framework for visual generation. After pre-training on enormous unlabeled data, the model needs to be properly aligned to meet requirements for downstream applications. How to efficiently align the foundation DM is a crucial task. Contemporary methods are either based on Reinforcement Learning (RL) or truncated Backpropagation (BP). However, RL and truncated BP suffer from low sample efficiency and biased gradient estimation respectively, resulting in limited improvement or, even worse, complete training failure. To overcome the challenges, we propose the Recursive Likelihood Ratio (RLR) optimizer, a zeroth-order informed fine-tuning paradigm for DM. The zeroth-order gradient estimator enables the computation graph rearrangement within the recursive diffusive chain, making the RLR's gradient estimator an unbiased one with the lower variance than other methods. We provide theoretical guarantees for the performance of the RLR. Extensive experiments are conducted on image and video generation tasks to validate the superiority of the RLR. Furthermore, we propose a novel prompt technique that is natural for the RLR to achieve a synergistic effect. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00639v1-abstract-full').style.display = 'none'; document.getElementById('2502.00639v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19347">arXiv:2501.19347</a> <span> [<a href="https://arxiv.org/pdf/2501.19347">pdf</a>, <a href="https://arxiv.org/ps/2501.19347">ps</a>, <a href="https://arxiv.org/format/2501.19347">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> An All-digital 65-nm Tsetlin Machine Image Classification Accelerator with 8.6 nJ per MNIST Frame at 60.3k Frames per Second </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tunheim%2C+S+A">Svein Anders Tunheim</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yujin Zheng</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+L">Lei Jiao</a>, <a href="/search/cs?searchtype=author&query=Shafik%2C+R">Rishad Shafik</a>, <a href="/search/cs?searchtype=author&query=Yakovlev%2C+A">Alex Yakovlev</a>, <a href="/search/cs?searchtype=author&query=Granmo%2C+O">Ole-Christoffer Granmo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19347v1-abstract-short" style="display: inline;"> We present an all-digital programmable machine learning accelerator chip for image classification, underpinning on the Tsetlin machine (TM) principles. The TM is a machine learning algorithm founded on propositional logic, utilizing sub-pattern recognition expressions called clauses. The accelerator implements the coalesced TM version with convolution, and classifies booleanized images of 28… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19347v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19347v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19347v1-abstract-full" style="display: none;"> We present an all-digital programmable machine learning accelerator chip for image classification, underpinning on the Tsetlin machine (TM) principles. The TM is a machine learning algorithm founded on propositional logic, utilizing sub-pattern recognition expressions called clauses. The accelerator implements the coalesced TM version with convolution, and classifies booleanized images of 28$\times$28 pixels with 10 categories. A configuration with 128 clauses is used in a highly parallel architecture. Fast clause evaluation is obtained by keeping all clause weights and Tsetlin automata (TA) action signals in registers. The chip is implemented in a 65 nm low-leakage CMOS technology, and occupies an active area of 2.7mm$^2$. At a clock frequency of 27.8 MHz, the accelerator achieves 60.3k classifications per second, and consumes 8.6 nJ per classification. The latency for classifying a single image is 25.4 $渭$s which includes system timing overhead. The accelerator achieves 97.42%, 84.54% and 82.55% test accuracies for the datasets MNIST, Fashion-MNIST and Kuzushiji-MNIST, respectively, matching the TM software models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19347v1-abstract-full').style.display = 'none'; document.getElementById('2501.19347v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures. This work has been submitted to the IEEE for possible publication</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> B.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15564">arXiv:2501.15564</a> <span> [<a href="https://arxiv.org/pdf/2501.15564">pdf</a>, <a href="https://arxiv.org/format/2501.15564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Diffusion-Based Planning for Autonomous Driving with Flexible Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yinan Zheng</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+R">Ruiming Liang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+K">Kexin Zheng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jinliang Zheng</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+L">Liyuan Mao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianxiong Li</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+W">Weihao Gu</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+R">Rui Ai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S+E">Shengbo Eben Li</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+X">Xianyuan Zhan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingjing Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15564v2-abstract-short" style="display: inline;"> Achieving human-like driving behaviors in complex open-world environments is a critical challenge in autonomous driving. Contemporary learning-based planning approaches such as imitation learning methods often struggle to balance competing objectives and lack of safety assurance,due to limited adaptability and inadequacy in learning complex multi-modal behaviors commonly exhibited in human plannin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15564v2-abstract-full').style.display = 'inline'; document.getElementById('2501.15564v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15564v2-abstract-full" style="display: none;"> Achieving human-like driving behaviors in complex open-world environments is a critical challenge in autonomous driving. Contemporary learning-based planning approaches such as imitation learning methods often struggle to balance competing objectives and lack of safety assurance,due to limited adaptability and inadequacy in learning complex multi-modal behaviors commonly exhibited in human planning, not to mention their strong reliance on the fallback strategy with predefined rules. We propose a novel transformer-based Diffusion Planner for closed-loop planning, which can effectively model multi-modal driving behavior and ensure trajectory quality without any rule-based refinement. Our model supports joint modeling of both prediction and planning tasks under the same architecture, enabling cooperative behaviors between vehicles. Moreover, by learning the gradient of the trajectory score function and employing a flexible classifier guidance mechanism, Diffusion Planner effectively achieves safe and adaptable planning behaviors. Evaluations on the large-scale real-world autonomous planning benchmark nuPlan and our newly collected 200-hour delivery-vehicle driving dataset demonstrate that Diffusion Planner achieves state-of-the-art closed-loop performance with robust transferability in diverse driving styles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15564v2-abstract-full').style.display = 'none'; document.getElementById('2501.15564v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15519">arXiv:2501.15519</a> <span> [<a href="https://arxiv.org/pdf/2501.15519">pdf</a>, <a href="https://arxiv.org/format/2501.15519">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Fuzzy-aware Loss for Source-free Domain Adaptation in Visual Emotion Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Ying Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yiyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yi Wang</a>, <a href="/search/cs?searchtype=author&query=Chau%2C+L">Lap-Pui Chau</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15519v1-abstract-short" style="display: inline;"> Source-free domain adaptation in visual emotion recognition (SFDA-VER) is a highly challenging task that requires adapting VER models to the target domain without relying on source data, which is of great significance for data privacy protection. However, due to the unignorable disparities between visual emotion data and traditional image classification data, existing SFDA methods perform poorly o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15519v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15519v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15519v1-abstract-full" style="display: none;"> Source-free domain adaptation in visual emotion recognition (SFDA-VER) is a highly challenging task that requires adapting VER models to the target domain without relying on source data, which is of great significance for data privacy protection. However, due to the unignorable disparities between visual emotion data and traditional image classification data, existing SFDA methods perform poorly on this task. In this paper, we investigate the SFDA-VER task from a fuzzy perspective and identify two key issues: fuzzy emotion labels and fuzzy pseudo-labels. These issues arise from the inherent uncertainty of emotion annotations and the potential mispredictions in pseudo-labels. To address these issues, we propose a novel fuzzy-aware loss (FAL) to enable the VER model to better learn and adapt to new domains under fuzzy labels. Specifically, FAL modifies the standard cross entropy loss and focuses on adjusting the losses of non-predicted categories, which prevents a large number of uncertain or incorrect predictions from overwhelming the VER model during adaptation. In addition, we provide a theoretical analysis of FAL and prove its robustness in handling the noise in generated pseudo-labels. Extensive experiments on 26 domain adaptation sub-tasks across three benchmark datasets demonstrate the effectiveness of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15519v1-abstract-full').style.display = 'none'; document.getElementById('2501.15519v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14276">arXiv:2501.14276</a> <span> [<a href="https://arxiv.org/pdf/2501.14276">pdf</a>, <a href="https://arxiv.org/format/2501.14276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Global Semantic-Guided Sub-image Feature Weight Allocation in High-Resolution Large Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuxuan Liang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xu Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaolei Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haotian Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yi Zheng</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+C">Chenghang Lai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bin Li</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+X">Xiangyang Xue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14276v1-abstract-short" style="display: inline;"> As the demand for high-resolution image processing in Large Vision-Language Models (LVLMs) grows, sub-image partitioning has become a popular approach for mitigating visual information loss associated with fixed-resolution processing. However, existing partitioning methods uniformly process sub-images, resulting in suboptimal image understanding. In this work, we reveal that the sub-images with hi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14276v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14276v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14276v1-abstract-full" style="display: none;"> As the demand for high-resolution image processing in Large Vision-Language Models (LVLMs) grows, sub-image partitioning has become a popular approach for mitigating visual information loss associated with fixed-resolution processing. However, existing partitioning methods uniformly process sub-images, resulting in suboptimal image understanding. In this work, we reveal that the sub-images with higher semantic relevance to the entire image encapsulate richer visual information for preserving the model's visual understanding ability. Therefore, we propose the Global Semantic-guided Weight Allocator (GSWA) module, which dynamically allocates weights to sub-images based on their relative information density, emulating human visual attention mechanisms. This approach enables the model to focus on more informative regions, overcoming the limitations of uniform treatment. We integrate GSWA into the InternVL2-2B framework to create SleighVL, a lightweight yet high-performing model. Extensive experiments demonstrate that SleighVL outperforms models with comparable parameters and remains competitive with larger models. Our work provides a promising direction for more efficient and contextually aware high-resolution image processing in LVLMs, advancing multimodal system development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14276v1-abstract-full').style.display = 'none'; document.getElementById('2501.14276v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 figures and tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13141">arXiv:2501.13141</a> <span> [<a href="https://arxiv.org/pdf/2501.13141">pdf</a>, <a href="https://arxiv.org/format/2501.13141">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AirRadar: Inferring Nationwide Air Quality in China with Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiongyan Wang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yutong Xia</a>, <a href="/search/cs?searchtype=author&query=ZHong%2C+S">Siru ZHong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Weichuang Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuankai Wu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">Shifen Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junbo Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yu Zheng</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuxuan Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13141v1-abstract-short" style="display: inline;"> Monitoring real-time air quality is essential for safeguarding public health and fostering social progress. However, the widespread deployment of air quality monitoring stations is constrained by their significant costs. To address this limitation, we introduce \emph{AirRadar}, a deep neural network designed to accurately infer real-time air quality in locations lacking monitoring stations by util… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13141v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13141v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13141v1-abstract-full" style="display: none;"> Monitoring real-time air quality is essential for safeguarding public health and fostering social progress. However, the widespread deployment of air quality monitoring stations is constrained by their significant costs. To address this limitation, we introduce \emph{AirRadar}, a deep neural network designed to accurately infer real-time air quality in locations lacking monitoring stations by utilizing data from existing ones. By leveraging learnable mask tokens, AirRadar reconstructs air quality features in unmonitored regions. Specifically, it operates in two stages: first capturing spatial correlations and then adjusting for distribution shifts. We validate AirRadar's efficacy using a year-long dataset from 1,085 monitoring stations across China, demonstrating its superiority over multiple baselines, even with varying degrees of unobserved data. The source code can be accessed at https://github.com/CityMind-Lab/AirRadar. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13141v1-abstract-full').style.display = 'none'; document.getElementById('2501.13141v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12948">arXiv:2501.12948</a> <span> [<a href="https://arxiv.org/pdf/2501.12948">pdf</a>, <a href="https://arxiv.org/format/2501.12948">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeepSeek-AI"> DeepSeek-AI</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+D">Daya Guo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Dejian Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haowei Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">Junxiao Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruoyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Runxin Xu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qihao Zhu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shirong Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+X">Xiao Bi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaokang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xingkai Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+F">Z. F. Wu</a>, <a href="/search/cs?searchtype=author&query=Gou%2C+Z">Zhibin Gou</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Z">Zhihong Shao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoshu Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Ziyi Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Aixin Liu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+B">Bing Xue</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bingxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bochao Wu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+B">Bei Feng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chengda Lu</a> , et al. (175 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12948v1-abstract-short" style="display: inline;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12948v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12948v1-abstract-full" style="display: none;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters challenges such as poor readability, and language mixing. To address these issues and further enhance reasoning performance, we introduce DeepSeek-R1, which incorporates multi-stage training and cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217 on reasoning tasks. To support the research community, we open-source DeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B, 70B) distilled from DeepSeek-R1 based on Qwen and Llama. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'none'; document.getElementById('2501.12948v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12500">arXiv:2501.12500</a> <span> [<a href="https://arxiv.org/pdf/2501.12500">pdf</a>, <a href="https://arxiv.org/format/2501.12500">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Identification of Nonparametric Dynamic Causal Structure and Latent Process in Climate System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fu%2C+M">Minghao Fu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Biwei Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zijian Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yujia Zheng</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+I">Ignavier Ng</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yingyao Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12500v1-abstract-short" style="display: inline;"> The study of learning causal structure with latent variables has advanced the understanding of the world by uncovering causal relationships and latent factors, e.g., Causal Representation Learning (CRL). However, in real-world scenarios, such as those in climate systems, causal relationships are often nonparametric, dynamic, and exist among both observed variables and latent variables. These chall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12500v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12500v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12500v1-abstract-full" style="display: none;"> The study of learning causal structure with latent variables has advanced the understanding of the world by uncovering causal relationships and latent factors, e.g., Causal Representation Learning (CRL). However, in real-world scenarios, such as those in climate systems, causal relationships are often nonparametric, dynamic, and exist among both observed variables and latent variables. These challenges motivate us to consider a general setting in which causal relations are nonparametric and unrestricted in their occurrence, which is unconventional to current methods. To solve this problem, with the aid of 3-measurement in temporal structure, we theoretically show that both latent variables and processes can be identified up to minor indeterminacy under mild assumptions. Moreover, we tackle the general nonlinear Causal Discovery (CD) from observations, e.g., temperature, as a specific task of learning independent representation, through the principle of functional equivalence. Based on these insights, we develop an estimation approach simultaneously recovering both the observed causal structure and latent causal process in a nontrivial manner. Simulation studies validate the theoretical foundations and demonstrate the effectiveness of the proposed methodology. In the experiments involving climate data, this approach offers a powerful and in-depth understanding of the climate system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12500v1-abstract-full').style.display = 'none'; document.getElementById('2501.12500v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12326">arXiv:2501.12326</a> <span> [<a href="https://arxiv.org/pdf/2501.12326">pdf</a>, <a href="https://arxiv.org/format/2501.12326">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> UI-TARS: Pioneering Automated GUI Interaction with Native Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yujia Qin</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yining Ye</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+J">Junjie Fang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoming Wang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+S">Shihao Liang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+S">Shizuo Tian</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junda Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiahao Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunxin Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Shijue Huang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+W">Wanjun Zhong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kuanye Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiale Yang</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+Y">Yu Miao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Woyu Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Longxiang Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xu Jiang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Q">Qianli Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jingyu Li</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+X">Xiaojun Xiao</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+K">Kai Cai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chuang Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yaowei Zheng</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+C">Chaolin Jin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chen Li</a> , et al. (10 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12326v1-abstract-short" style="display: inline;"> This paper introduces UI-TARS, a native GUI agent model that solely perceives the screenshots as input and performs human-like interactions (e.g., keyboard and mouse operations). Unlike prevailing agent frameworks that depend on heavily wrapped commercial models (e.g., GPT-4o) with expert-crafted prompts and workflows, UI-TARS is an end-to-end model that outperforms these sophisticated frameworks.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12326v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12326v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12326v1-abstract-full" style="display: none;"> This paper introduces UI-TARS, a native GUI agent model that solely perceives the screenshots as input and performs human-like interactions (e.g., keyboard and mouse operations). Unlike prevailing agent frameworks that depend on heavily wrapped commercial models (e.g., GPT-4o) with expert-crafted prompts and workflows, UI-TARS is an end-to-end model that outperforms these sophisticated frameworks. Experiments demonstrate its superior performance: UI-TARS achieves SOTA performance in 10+ GUI agent benchmarks evaluating perception, grounding, and GUI task execution. Notably, in the OSWorld benchmark, UI-TARS achieves scores of 24.6 with 50 steps and 22.7 with 15 steps, outperforming Claude (22.0 and 14.9 respectively). In AndroidWorld, UI-TARS achieves 46.6, surpassing GPT-4o (34.5). UI-TARS incorporates several key innovations: (1) Enhanced Perception: leveraging a large-scale dataset of GUI screenshots for context-aware understanding of UI elements and precise captioning; (2) Unified Action Modeling, which standardizes actions into a unified space across platforms and achieves precise grounding and interaction through large-scale action traces; (3) System-2 Reasoning, which incorporates deliberate reasoning into multi-step decision making, involving multiple reasoning patterns such as task decomposition, reflection thinking, milestone recognition, etc. (4) Iterative Training with Reflective Online Traces, which addresses the data bottleneck by automatically collecting, filtering, and reflectively refining new interaction traces on hundreds of virtual machines. Through iterative training and reflection tuning, UI-TARS continuously learns from its mistakes and adapts to unforeseen situations with minimal human intervention. We also analyze the evolution path of GUI agents to guide the further development of this domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12326v1-abstract-full').style.display = 'none'; document.getElementById('2501.12326v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11241">arXiv:2501.11241</a> <span> [<a href="https://arxiv.org/pdf/2501.11241">pdf</a>, <a href="https://arxiv.org/format/2501.11241">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Irony in Emojis: A Comparative Study of Human and LLM Interpretation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yawen Zheng</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+H">Hanjia Lyu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jiebo Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11241v1-abstract-short" style="display: inline;"> Emojis have become a universal language in online communication, often carrying nuanced and context-dependent meanings. Among these, irony poses a significant challenge for Large Language Models (LLMs) due to its inherent incongruity between appearance and intent. This study examines the ability of GPT-4o to interpret irony in emojis. By prompting GPT-4o to evaluate the likelihood of specific emoj… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11241v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11241v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11241v1-abstract-full" style="display: none;"> Emojis have become a universal language in online communication, often carrying nuanced and context-dependent meanings. Among these, irony poses a significant challenge for Large Language Models (LLMs) due to its inherent incongruity between appearance and intent. This study examines the ability of GPT-4o to interpret irony in emojis. By prompting GPT-4o to evaluate the likelihood of specific emojis being used to express irony on social media and comparing its interpretations with human perceptions, we aim to bridge the gap between machine and human understanding. Our findings reveal nuanced insights into GPT-4o's interpretive capabilities, highlighting areas of alignment with and divergence from human behavior. Additionally, this research underscores the importance of demographic factors, such as age and gender, in shaping emoji interpretation and evaluates how these factors influence GPT-4o's performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11241v1-abstract-full').style.display = 'none'; document.getElementById('2501.11241v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11214">arXiv:2501.11214</a> <span> [<a href="https://arxiv.org/pdf/2501.11214">pdf</a>, <a href="https://arxiv.org/format/2501.11214">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mitigating Spatial Disparity in Urban Prediction Using Residual-Aware Spatiotemporal Graph Neural Networks: A Chicago Case Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhuang%2C+D">Dingyi Zhuang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hanyong Xu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xiaotong Guo</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yunhan Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shenhao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jinhua Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11214v1-abstract-short" style="display: inline;"> Urban prediction tasks, such as forecasting traffic flow, temperature, and crime rates, are crucial for efficient urban planning and management. However, existing Spatiotemporal Graph Neural Networks (ST-GNNs) often rely solely on accuracy, overlooking spatial and demographic disparities in their predictions. This oversight can lead to imbalanced resource allocation and exacerbate existing inequit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11214v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11214v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11214v1-abstract-full" style="display: none;"> Urban prediction tasks, such as forecasting traffic flow, temperature, and crime rates, are crucial for efficient urban planning and management. However, existing Spatiotemporal Graph Neural Networks (ST-GNNs) often rely solely on accuracy, overlooking spatial and demographic disparities in their predictions. This oversight can lead to imbalanced resource allocation and exacerbate existing inequities in urban areas. This study introduces a Residual-Aware Attention (RAA) Block and an equality-enhancing loss function to address these disparities. By adapting the adjacency matrix during training and incorporating spatial disparity metrics, our approach aims to reduce local segregation of residuals and errors. We applied our methodology to urban prediction tasks in Chicago, utilizing a travel demand dataset as an example. Our model achieved a 48% significant improvement in fairness metrics with only a 9% increase in error metrics. Spatial analysis of residual distributions revealed that models with RAA Blocks produced more equitable prediction results, particularly by reducing errors clustered in central regions. Attention maps demonstrated the model's ability to dynamically adjust focus, leading to more balanced predictions. Case studies of various community areas in Chicago further illustrated the effectiveness of our approach in addressing spatial and demographic disparities, supporting more balanced and equitable urban planning and policy-making. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11214v1-abstract-full').style.display = 'none'; document.getElementById('2501.11214v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10105">arXiv:2501.10105</a> <span> [<a href="https://arxiv.org/pdf/2501.10105">pdf</a>, <a href="https://arxiv.org/format/2501.10105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Universal Actions for Enhanced Embodied Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jinliang Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianxiong Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dongxiu Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yinan Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhihao Wang</a>, <a href="/search/cs?searchtype=author&query=Ou%2C+Z">Zhonghong Ou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yu Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingjing Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ya-Qin Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+X">Xianyuan Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10105v1-abstract-short" style="display: inline;"> Training on diverse, internet-scale data is a key factor in the success of recent large foundation models. Yet, using the same recipe for building embodied agents has faced noticeable difficulties. Despite the availability of many crowd-sourced embodied datasets, their action spaces often exhibit significant heterogeneity due to distinct physical embodiment and control interfaces for different rob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10105v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10105v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10105v1-abstract-full" style="display: none;"> Training on diverse, internet-scale data is a key factor in the success of recent large foundation models. Yet, using the same recipe for building embodied agents has faced noticeable difficulties. Despite the availability of many crowd-sourced embodied datasets, their action spaces often exhibit significant heterogeneity due to distinct physical embodiment and control interfaces for different robots, causing substantial challenges in developing embodied foundation models using cross-domain data. In this paper, we introduce UniAct, a new embodied foundation modeling framework operating in a tokenized Universal Action Space. Our learned universal actions capture the generic atomic behaviors across diverse robots by exploiting their shared structural features, and enable enhanced cross-domain data utilization and cross-embodiment generalizations by eliminating the notorious heterogeneity. The universal actions can be efficiently translated back to heterogeneous actionable commands by simply adding embodiment-specific details, from which fast adaptation to new robots becomes simple and straightforward. Our 0.5B instantiation of UniAct outperforms 14X larger SOTA embodied foundation models in extensive evaluations on various real-world and simulation robots, showcasing exceptional cross-embodiment control and adaptation capability, highlighting the crucial benefit of adopting universal actions. Project page: https://github.com/2toinf/UniAct <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10105v1-abstract-full').style.display = 'none'; document.getElementById('2501.10105v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09412">arXiv:2501.09412</a> <span> [<a href="https://arxiv.org/pdf/2501.09412">pdf</a>, <a href="https://arxiv.org/format/2501.09412">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FASP: Fast and Accurate Structured Pruning of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+H">Hanyu Hu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+P">Pengxiang Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Ping Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yi Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhefeng Wang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+X">Xiaoming Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09412v1-abstract-short" style="display: inline;"> The rapid increase in the size of large language models (LLMs) has significantly escalated their computational and memory demands, posing challenges for efficient deployment, especially on resource-constrained devices. Structured pruning has emerged as an effective model compression method that can reduce these demands while preserving performance. In this paper, we introduce FASP (Fast and Accura… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09412v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09412v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09412v1-abstract-full" style="display: none;"> The rapid increase in the size of large language models (LLMs) has significantly escalated their computational and memory demands, posing challenges for efficient deployment, especially on resource-constrained devices. Structured pruning has emerged as an effective model compression method that can reduce these demands while preserving performance. In this paper, we introduce FASP (Fast and Accurate Structured Pruning), a novel structured pruning framework for LLMs that emphasizes both speed and accuracy. FASP employs a distinctive pruning structure that interlinks sequential layers, allowing for the removal of columns in one layer while simultaneously eliminating corresponding rows in the preceding layer without incurring additional performance loss. The pruning metric, inspired by Wanda, is computationally efficient and effectively selects components to prune. Additionally, we propose a restoration mechanism that enhances model fidelity by adjusting the remaining weights post-pruning. We evaluate FASP on the OPT and LLaMA model families, demonstrating superior performance in terms of perplexity and accuracy on downstream tasks compared to state-of-the-art methods. Our approach achieves significant speed-ups, pruning models such as OPT-125M in 17 seconds and LLaMA-30B in 15 minutes on a single NVIDIA RTX 4090 GPU, making it a highly practical solution for optimizing LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09412v1-abstract-full').style.display = 'none'; document.getElementById('2501.09412v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08443">arXiv:2501.08443</a> <span> [<a href="https://arxiv.org/pdf/2501.08443">pdf</a>, <a href="https://arxiv.org/format/2501.08443">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Instruction-Guided Fusion of Multi-Layer Visual Features in Large Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xu Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yi Zheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haotian Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaolei Chen</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuxuan Liang</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+C">Chenghang Lai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bin Li</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+X">Xiangyang Xue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08443v3-abstract-short" style="display: inline;"> Large Vision-Language Models (LVLMs) have achieved remarkable success in a wide range of multimodal tasks by integrating pre-trained vision encoders and large language models. However, current LVLMs primarily rely on visual features extracted from the final layers of the vision encoder, overlooking the complementary information available in shallower layers. While recent approaches have explored t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08443v3-abstract-full').style.display = 'inline'; document.getElementById('2501.08443v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08443v3-abstract-full" style="display: none;"> Large Vision-Language Models (LVLMs) have achieved remarkable success in a wide range of multimodal tasks by integrating pre-trained vision encoders and large language models. However, current LVLMs primarily rely on visual features extracted from the final layers of the vision encoder, overlooking the complementary information available in shallower layers. While recent approaches have explored the use of multilayer visual features in LVLMs, they tend to be task-agnostic and fail to examine the dependencies of hierarchical visual features on specific tasks. To address these gaps, we systematically investigate the contributions of visual features from different encoder layers using 18 benchmarks spanning 6 task categories. Our findings reveal that multilayer features provide complementary strengths with varying task dependencies, and uniform fusion leads to suboptimal performance. Building on these insights, we propose the instruction-guided vision aggregator, a module that dynamically integrates multi-layer visual features based on textual instructions, without increasing the number of visual tokens. Extensive evaluations demonstrate the superior performance of our method. Additionally, an in-depth analysis of the aggregator's behavior highlights the dominance of mid-to-high-level features in semantic-rich tasks and the critical role of low-level features in fine-grained perception. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08443v3-abstract-full').style.display = 'none'; document.getElementById('2501.08443v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07378">arXiv:2501.07378</a> <span> [<a href="https://arxiv.org/pdf/2501.07378">pdf</a>, <a href="https://arxiv.org/format/2501.07378">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FedSemiDG: Domain Generalized Federated Semi-supervised Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zhipeng Deng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhe Xu</a>, <a href="/search/cs?searchtype=author&query=Isshiki%2C+T">Tsuyoshi Isshiki</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07378v1-abstract-short" style="display: inline;"> Medical image segmentation is challenging due to the diversity of medical images and the lack of labeled data, which motivates recent developments in federated semi-supervised learning (FSSL) to leverage a large amount of unlabeled data from multiple centers for model training without sharing raw data. However, what remains under-explored in FSSL is the domain shift problem which may cause subopti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07378v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07378v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07378v1-abstract-full" style="display: none;"> Medical image segmentation is challenging due to the diversity of medical images and the lack of labeled data, which motivates recent developments in federated semi-supervised learning (FSSL) to leverage a large amount of unlabeled data from multiple centers for model training without sharing raw data. However, what remains under-explored in FSSL is the domain shift problem which may cause suboptimal model aggregation and low effectivity of the utilization of unlabeled data, eventually leading to unsatisfactory performance in unseen domains. In this paper, we explore this previously ignored scenario, namely domain generalized federated semi-supervised learning (FedSemiDG), which aims to learn a model in a distributed manner from multiple domains with limited labeled data and abundant unlabeled data such that the model can generalize well to unseen domains. We present a novel framework, Federated Generalization-Aware SemiSupervised Learning (FGASL), to address the challenges in FedSemiDG by effectively tackling critical issues at both global and local levels. Globally, we introduce Generalization-Aware Aggregation (GAA), assigning adaptive weights to local models based on their generalization performance. Locally, we use a Dual-Teacher Adaptive Pseudo Label Refinement (DR) strategy to combine global and domain-specific knowledge, generating more reliable pseudo labels. Additionally, Perturbation-Invariant Alignment (PIA) enforces feature consistency under perturbations, promoting domain-invariant learning. Extensive experiments on three medical segmentation tasks (cardiac MRI, spine MRI and bladder cancer MRI) demonstrate that our method significantly outperforms state-of-the-art FSSL and domain generalization approaches, achieving robust generalization on unseen domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07378v1-abstract-full').style.display = 'none'; document.getElementById('2501.07378v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06469">arXiv:2501.06469</a> <span> [<a href="https://arxiv.org/pdf/2501.06469">pdf</a>, <a href="https://arxiv.org/format/2501.06469">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SP-SLAM: Neural Real-Time Dense SLAM With Scene Priors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hong%2C+Z">Zhen Hong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bowen Wang</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+H">Haoran Duan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yawen Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiong Li</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zhenyu Wen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiang Wu</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+W">Wei Xiang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06469v1-abstract-short" style="display: inline;"> Neural implicit representations have recently shown promising progress in dense Simultaneous Localization And Mapping (SLAM). However, existing works have shortcomings in terms of reconstruction quality and real-time performance, mainly due to inflexible scene representation strategy without leveraging any prior information. In this paper, we introduce SP-SLAM, a novel neural RGB-D SLAM system tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06469v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06469v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06469v1-abstract-full" style="display: none;"> Neural implicit representations have recently shown promising progress in dense Simultaneous Localization And Mapping (SLAM). However, existing works have shortcomings in terms of reconstruction quality and real-time performance, mainly due to inflexible scene representation strategy without leveraging any prior information. In this paper, we introduce SP-SLAM, a novel neural RGB-D SLAM system that performs tracking and mapping in real-time. SP-SLAM computes depth images and establishes sparse voxel-encoded scene priors near the surfaces to achieve rapid convergence of the model. Subsequently, the encoding voxels computed from single-frame depth image are fused into a global volume, which facilitates high-fidelity surface reconstruction. Simultaneously, we employ tri-planes to store scene appearance information, striking a balance between achieving high-quality geometric texture mapping and minimizing memory consumption. Furthermore, in SP-SLAM, we introduce an effective optimization strategy for mapping, allowing the system to continuously optimize the poses of all historical input frames during runtime without increasing computational overhead. We conduct extensive evaluations on five benchmark datasets (Replica, ScanNet, TUM RGB-D, Synthetic RGB-D, 7-Scenes). The results demonstrate that, compared to existing methods, we achieve superior tracking accuracy and reconstruction quality, while running at a significantly faster speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06469v1-abstract-full').style.display = 'none'; document.getElementById('2501.06469v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06430">arXiv:2501.06430</a> <span> [<a href="https://arxiv.org/pdf/2501.06430">pdf</a>, <a href="https://arxiv.org/format/2501.06430">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Open Eyes, Then Reason: Fine-grained Visual Mathematical Understanding in MLLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shan Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+A">Aotian Chen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yanpeng Sun</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">Jindong Gu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yi-Yu Zheng</a>, <a href="/search/cs?searchtype=author&query=Koniusz%2C+P">Piotr Koniusz</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+K">Kai Zou</a>, <a href="/search/cs?searchtype=author&query=Hengel%2C+A+v+d">Anton van den Hengel</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Y">Yuan Xue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06430v1-abstract-short" style="display: inline;"> Current multimodal large language models (MLLMs) often underperform on mathematical problem-solving tasks that require fine-grained visual understanding. The limitation is largely attributable to inadequate perception of geometric primitives during image-level contrastive pre-training (e.g., CLIP). While recent efforts to improve math MLLMs have focused on scaling up mathematical visual instructio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06430v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06430v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06430v1-abstract-full" style="display: none;"> Current multimodal large language models (MLLMs) often underperform on mathematical problem-solving tasks that require fine-grained visual understanding. The limitation is largely attributable to inadequate perception of geometric primitives during image-level contrastive pre-training (e.g., CLIP). While recent efforts to improve math MLLMs have focused on scaling up mathematical visual instruction datasets and employing stronger LLM backbones, they often overlook persistent errors in visual recognition. In this paper, we systematically evaluate the visual grounding capabilities of state-of-the-art MLLMs and reveal a significant negative correlation between visual grounding accuracy and problem-solving performance, underscoring the critical role of fine-grained visual understanding. Notably, advanced models like GPT-4o exhibit a 70% error rate when identifying geometric entities, highlighting that this remains a key bottleneck in visual mathematical reasoning. To address this, we propose a novel approach, SVE-Math (Selective Vision-Enhanced Mathematical MLLM), featuring a geometric-grounded vision encoder and a feature router that dynamically adjusts the contribution of hierarchical visual feature maps. Our model recognizes accurate visual primitives and generates precise visual prompts tailored to the language model's reasoning needs. In experiments, SVE-Math-Qwen2.5-7B outperforms other 7B models by 15% on MathVerse and is compatible with GPT-4V on MathVista. Despite being trained on smaller datasets, SVE-Math-7B achieves competitive performance on GeoQA, rivaling models trained on significantly larger datasets. Our findings emphasize the importance of incorporating fine-grained visual understanding into MLLMs and provide a promising direction for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06430v1-abstract-full').style.display = 'none'; document.getElementById('2501.06430v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04477">arXiv:2501.04477</a> <span> [<a href="https://arxiv.org/pdf/2501.04477">pdf</a>, <a href="https://arxiv.org/format/2501.04477">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rethinking High-speed Image Reconstruction Framework with Spike Camera </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kang Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yajing Zheng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tiejun Huang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhaofei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04477v1-abstract-short" style="display: inline;"> Spike cameras, as innovative neuromorphic devices, generate continuous spike streams to capture high-speed scenes with lower bandwidth and higher dynamic range than traditional RGB cameras. However, reconstructing high-quality images from the spike input under low-light conditions remains challenging. Conventional learning-based methods often rely on the synthetic dataset as the supervision for tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04477v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04477v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04477v1-abstract-full" style="display: none;"> Spike cameras, as innovative neuromorphic devices, generate continuous spike streams to capture high-speed scenes with lower bandwidth and higher dynamic range than traditional RGB cameras. However, reconstructing high-quality images from the spike input under low-light conditions remains challenging. Conventional learning-based methods often rely on the synthetic dataset as the supervision for training. Still, these approaches falter when dealing with noisy spikes fired under the low-light environment, leading to further performance degradation in the real-world dataset. This phenomenon is primarily due to inadequate noise modelling and the domain gap between synthetic and real datasets, resulting in recovered images with unclear textures, excessive noise, and diminished brightness. To address these challenges, we introduce a novel spike-to-image reconstruction framework SpikeCLIP that goes beyond traditional training paradigms. Leveraging the CLIP model's powerful capability to align text and images, we incorporate the textual description of the captured scene and unpaired high-quality datasets as the supervision. Our experiments on real-world low-light datasets U-CALTECH and U-CIFAR demonstrate that SpikeCLIP significantly enhances texture details and the luminance balance of recovered images. Furthermore, the reconstructed images are well-aligned with the broader visual features needed for downstream tasks, ensuring more robust and versatile performance in challenging environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04477v1-abstract-full').style.display = 'none'; document.getElementById('2501.04477v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03072">arXiv:2501.03072</a> <span> [<a href="https://arxiv.org/pdf/2501.03072">pdf</a>, <a href="https://arxiv.org/format/2501.03072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> OpenTable data with multi-criteria ratings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yong Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03072v1-abstract-short" style="display: inline;"> With the development of recommender systems (RSs), several promising systems have emerged, such as context-aware RS, multi-criteria RS, and group RS. Multi-criteria recommender systems (MCRSs) are designed to provide personalized recommendations by considering user preferences in multiple attributes or criteria simultaneously. Unlike traditional RSs that typically focus on a single rating, these s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03072v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03072v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03072v1-abstract-full" style="display: none;"> With the development of recommender systems (RSs), several promising systems have emerged, such as context-aware RS, multi-criteria RS, and group RS. Multi-criteria recommender systems (MCRSs) are designed to provide personalized recommendations by considering user preferences in multiple attributes or criteria simultaneously. Unlike traditional RSs that typically focus on a single rating, these systems help users make more informed decisions by considering their diverse preferences and needs across various dimensions. In this article, we release the OpenTable data set which was crawled from OpenTable.com. The data set can be considered as a benchmark data set for multi-criteria recommendations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03072v1-abstract-full').style.display = 'none'; document.getElementById('2501.03072v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02803">arXiv:2501.02803</a> <span> [<a href="https://arxiv.org/pdf/2501.02803">pdf</a>, <a href="https://arxiv.org/format/2501.02803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Lifelong Multi-Agent Path Finding with Cache Mechanism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yimin Tang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhenghong Yu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yi Zheng</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+T+K+S">T. K. Satish Kumar</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiaoyang Li</a>, <a href="/search/cs?searchtype=author&query=Koenig%2C+S">Sven Koenig</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02803v1-abstract-short" style="display: inline;"> Multi-Agent Path Finding (MAPF), which focuses on finding collision-free paths for multiple robots, is crucial in autonomous warehouse operations. Lifelong MAPF (L-MAPF), where agents are continuously reassigned new targets upon completing their current tasks, offers a more realistic approximation of real-world warehouse scenarios. While cache storage systems can enhance efficiency and reduce oper… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02803v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02803v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02803v1-abstract-full" style="display: none;"> Multi-Agent Path Finding (MAPF), which focuses on finding collision-free paths for multiple robots, is crucial in autonomous warehouse operations. Lifelong MAPF (L-MAPF), where agents are continuously reassigned new targets upon completing their current tasks, offers a more realistic approximation of real-world warehouse scenarios. While cache storage systems can enhance efficiency and reduce operational costs, existing approaches primarily rely on expectations and mathematical models, often without adequately addressing the challenges of multi-robot planning and execution. In this paper, we introduce a novel mechanism called Lifelong MAPF with Cache Mechanism (L-MAPF-CM), which integrates high-level cache storage with low-level path planning. We have involved a new type of map grid called cache for temporary item storage. Additionally, we involved a task assigner (TA) with a locking mechanism to bridge the gap between the new cache grid and L-MAPF algorithm. The TA dynamically allocates target locations to agents based on their status in various scenarios. We evaluated L-MAPF-CM using different cache replacement policies and task distributions. L-MAPF-CM has demonstrated performance improvements particularly with high cache hit rates and smooth traffic conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02803v1-abstract-full').style.display = 'none'; document.getElementById('2501.02803v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2403.13421</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00758">arXiv:2501.00758</a> <span> [<a href="https://arxiv.org/pdf/2501.00758">pdf</a>, <a href="https://arxiv.org/format/2501.00758">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Less is More: Token Context-aware Learning for Object Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chenlong Xu</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+B">Bineng Zhong</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Q">Qihua Liang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yaozong Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guorong Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+S">Shuxiang Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00758v1-abstract-short" style="display: inline;"> Recently, several studies have shown that utilizing contextual information to perceive target states is crucial for object tracking. They typically capture context by incorporating multiple video frames. However, these naive frame-context methods fail to consider the importance of each patch within a reference frame, making them susceptible to noise and redundant tokens, which deteriorates trackin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00758v1-abstract-full').style.display = 'inline'; document.getElementById('2501.00758v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00758v1-abstract-full" style="display: none;"> Recently, several studies have shown that utilizing contextual information to perceive target states is crucial for object tracking. They typically capture context by incorporating multiple video frames. However, these naive frame-context methods fail to consider the importance of each patch within a reference frame, making them susceptible to noise and redundant tokens, which deteriorates tracking performance. To address this challenge, we propose a new token context-aware tracking pipeline named LMTrack, designed to automatically learn high-quality reference tokens for efficient visual tracking. Embracing the principle of Less is More, the core idea of LMTrack is to analyze the importance distribution of all reference tokens, where important tokens are collected, continually attended to, and updated. Specifically, a novel Token Context Memory module is designed to dynamically collect high-quality spatio-temporal information of a target in an autoregressive manner, eliminating redundant background tokens from the reference frames. Furthermore, an effective Unidirectional Token Attention mechanism is designed to establish dependencies between reference tokens and search frame, enabling robust cross-frame association and target localization. Extensive experiments demonstrate the superiority of our tracker, achieving state-of-the-art results on tracking benchmarks such as GOT-10K, TrackingNet, and LaSOT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00758v1-abstract-full').style.display = 'none'; document.getElementById('2501.00758v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00332">arXiv:2501.00332</a> <span> [<a href="https://arxiv.org/pdf/2501.00332">pdf</a>, <a href="https://arxiv.org/format/2501.00332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> MAIN-RAG: Multi-Agent Filtering Retrieval-Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chang%2C+C">Chia-Yuan Chang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhimeng Jiang</a>, <a href="/search/cs?searchtype=author&query=Rakesh%2C+V">Vineeth Rakesh</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+M">Menghai Pan</a>, <a href="/search/cs?searchtype=author&query=Yeh%2C+C+M">Chin-Chia Michael Yeh</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guanchu Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+M">Mingzhi Hu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhichao Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yan Zheng</a>, <a href="/search/cs?searchtype=author&query=Das%2C+M">Mahashweta Das</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+N">Na Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00332v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) are becoming essential tools for various natural language processing tasks but often suffer from generating outdated or incorrect information. Retrieval-Augmented Generation (RAG) addresses this issue by incorporating external, real-time information retrieval to ground LLM responses. However, the existing RAG systems frequently struggle with the quality of retrieval do… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00332v1-abstract-full').style.display = 'inline'; document.getElementById('2501.00332v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00332v1-abstract-full" style="display: none;"> Large Language Models (LLMs) are becoming essential tools for various natural language processing tasks but often suffer from generating outdated or incorrect information. Retrieval-Augmented Generation (RAG) addresses this issue by incorporating external, real-time information retrieval to ground LLM responses. However, the existing RAG systems frequently struggle with the quality of retrieval documents, as irrelevant or noisy documents degrade performance, increase computational overhead, and undermine response reliability. To tackle this problem, we propose Multi-Agent Filtering Retrieval-Augmented Generation (MAIN-RAG), a training-free RAG framework that leverages multiple LLM agents to collaboratively filter and score retrieved documents. Specifically, MAIN-RAG introduces an adaptive filtering mechanism that dynamically adjusts the relevance filtering threshold based on score distributions, effectively minimizing noise while maintaining high recall of relevant documents. The proposed approach leverages inter-agent consensus to ensure robust document selection without requiring additional training data or fine-tuning. Experimental results across four QA benchmarks demonstrate that MAIN-RAG consistently outperforms traditional RAG approaches, achieving a 2-11% improvement in answer accuracy while reducing the number of irrelevant retrieved documents. Quantitative analysis further reveals that our approach achieves superior response consistency and answer accuracy over baseline methods, offering a competitive and practical alternative to training-based solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00332v1-abstract-full').style.display = 'none'; document.getElementById('2501.00332v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20799">arXiv:2412.20799</a> <span> [<a href="https://arxiv.org/pdf/2412.20799">pdf</a>, <a href="https://arxiv.org/format/2412.20799">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> SFE-Net: Harnessing Biological Principles of Differential Gene Expression for Improved Feature Selection in Deep Learning Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuqi Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuanzhong Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yaoxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+J">Jianjun Yin</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+H">Haojun Fei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20799v1-abstract-short" style="display: inline;"> In the realm of DeepFake detection, the challenge of adapting to various synthesis methodologies such as Faceswap, Deepfakes, Face2Face, and NeuralTextures significantly impacts the performance of traditional machine learning models. These models often suffer from static feature representation, which struggles to perform consistently across diversely generated deepfake datasets. Inspired by the bi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20799v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20799v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20799v1-abstract-full" style="display: none;"> In the realm of DeepFake detection, the challenge of adapting to various synthesis methodologies such as Faceswap, Deepfakes, Face2Face, and NeuralTextures significantly impacts the performance of traditional machine learning models. These models often suffer from static feature representation, which struggles to perform consistently across diversely generated deepfake datasets. Inspired by the biological concept of differential gene expression, where gene activation is dynamically regulated in response to environmental stimuli, we introduce the Selective Feature Expression Network (SFE-Net). This innovative framework integrates selective feature activation principles into deep learning architectures, allowing the model to dynamically adjust feature priorities in response to varying deepfake generation techniques. SFE-Net employs a novel mechanism that selectively enhances critical features essential for accurately detecting forgeries, while reducing the impact of irrelevant or misleading cues akin to adaptive evolutionary processes in nature. Through rigorous testing on a range of deepfake datasets, SFE-Net not only surpasses existing static models in detecting sophisticated forgeries but also shows enhanced generalization capabilities in cross-dataset scenarios. Our approach significantly mitigates overfitting by maintaining a dynamic balance between feature exploration and exploitation, thus producing more robust and effective deepfake detection models. This bio-inspired strategy paves the way for developing adaptive deep learning systems that are finely tuned to address the nuanced challenges posed by the varied nature of digital forgeries in modern digital forensics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20799v1-abstract-full').style.display = 'none'; document.getElementById('2412.20799v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages,3 figures,2 charts,conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20412">arXiv:2412.20412</a> <span> [<a href="https://arxiv.org/pdf/2412.20412">pdf</a>, <a href="https://arxiv.org/format/2412.20412">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-Objective Large Language Model Unlearning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zibin Pan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuwen Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuesheng Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chi Li</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yuheng Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junhua Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20412v2-abstract-short" style="display: inline;"> Machine unlearning in the domain of large language models (LLMs) has attracted great attention recently, which aims to effectively eliminate undesirable behaviors from LLMs without full retraining from scratch. In this paper, we explore the Gradient Ascent (GA) approach in LLM unlearning, which is a proactive way to decrease the prediction probability of the model on the target data in order to re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20412v2-abstract-full').style.display = 'inline'; document.getElementById('2412.20412v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20412v2-abstract-full" style="display: none;"> Machine unlearning in the domain of large language models (LLMs) has attracted great attention recently, which aims to effectively eliminate undesirable behaviors from LLMs without full retraining from scratch. In this paper, we explore the Gradient Ascent (GA) approach in LLM unlearning, which is a proactive way to decrease the prediction probability of the model on the target data in order to remove their influence. We analyze two challenges that render the process impractical: gradient explosion and catastrophic forgetting. To address these issues, we propose Multi-Objective Large Language Model Unlearning (MOLLM) algorithm. We first formulate LLM unlearning as a multi-objective optimization problem, in which the cross-entropy loss is modified to the unlearning version to overcome the gradient explosion issue. A common descent update direction is then calculated, which enables the model to forget the target data while preserving the utility of the LLM. Our empirical results verify that MoLLM outperforms the SOTA GA-based LLM unlearning methods in terms of unlearning effect and model utility preservation. The source code is available at https://github.com/zibinpan/MOLLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20412v2-abstract-full').style.display = 'none'; document.getElementById('2412.20412v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published in the Proceedings of 2025 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP-2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19437">arXiv:2412.19437</a> <span> [<a href="https://arxiv.org/pdf/2412.19437">pdf</a>, <a href="https://arxiv.org/format/2412.19437">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DeepSeek-V3 Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeepSeek-AI"> DeepSeek-AI</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Aixin Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+B">Bei Feng</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+B">Bing Xue</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bingxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bochao Wu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chengda Lu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Chenggang Zhao</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+C">Chengqi Deng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+C">Chong Ruan</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+D">Damai Dai</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+D">Daya Guo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Dejian Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Deli Chen</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+D">Dongjie Ji</a>, <a href="/search/cs?searchtype=author&query=Li%2C+E">Erhang Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+F">Fangyun Lin</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+F">Fucong Dai</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+F">Fuli Luo</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+G">Guangbo Hao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Guanting Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guowei Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">H. Zhang</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+H">Han Bao</a> , et al. (175 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19437v2-abstract-short" style="display: inline;"> We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2. Furthermore, DeepSeek-V3 pioneers an auxiliary-loss-free strategy for loa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19437v2-abstract-full').style.display = 'inline'; document.getElementById('2412.19437v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19437v2-abstract-full" style="display: none;"> We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2. Furthermore, DeepSeek-V3 pioneers an auxiliary-loss-free strategy for load balancing and sets a multi-token prediction training objective for stronger performance. We pre-train DeepSeek-V3 on 14.8 trillion diverse and high-quality tokens, followed by Supervised Fine-Tuning and Reinforcement Learning stages to fully harness its capabilities. Comprehensive evaluations reveal that DeepSeek-V3 outperforms other open-source models and achieves performance comparable to leading closed-source models. Despite its excellent performance, DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training. In addition, its training process is remarkably stable. Throughout the entire training process, we did not experience any irrecoverable loss spikes or perform any rollbacks. The model checkpoints are available at https://github.com/deepseek-ai/DeepSeek-V3. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19437v2-abstract-full').style.display = 'none'; document.getElementById('2412.19437v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18287">arXiv:2412.18287</a> <span> [<a href="https://arxiv.org/pdf/2412.18287">pdf</a>, <a href="https://arxiv.org/format/2412.18287">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1609/aaai.v37i12.26702">10.1609/aaai.v37i12.26702 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Semi-supervised Credit Card Fraud Detection via Attribute-Driven Graph Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Sheng Xiang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+M">Mingzhi Zhu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+D">Dawei Cheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+E">Enxia Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+R">Ruihui Zhao</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+Y">Yi Ouyang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Ling Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18287v1-abstract-short" style="display: inline;"> Credit card fraud incurs a considerable cost for both cardholders and issuing banks. Contemporary methods apply machine learning-based classifiers to detect fraudulent behavior from labeled transaction records. But labeled data are usually a small proportion of billions of real transactions due to expensive labeling costs, which implies that they do not well exploit many natural features from unla… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18287v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18287v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18287v1-abstract-full" style="display: none;"> Credit card fraud incurs a considerable cost for both cardholders and issuing banks. Contemporary methods apply machine learning-based classifiers to detect fraudulent behavior from labeled transaction records. But labeled data are usually a small proportion of billions of real transactions due to expensive labeling costs, which implies that they do not well exploit many natural features from unlabeled data. Therefore, we propose a semi-supervised graph neural network for fraud detection. Specifically, we leverage transaction records to construct a temporal transaction graph, which is composed of temporal transactions (nodes) and interactions (edges) among them. Then we pass messages among the nodes through a Gated Temporal Attention Network (GTAN) to learn the transaction representation. We further model the fraud patterns through risk propagation among transactions. The extensive experiments are conducted on a real-world transaction dataset and two publicly available fraud detection datasets. The result shows that our proposed method, namely GTAN, outperforms other state-of-the-art baselines on three fraud detection datasets. Semi-supervised experiments demonstrate the excellent fraud detection performance of our model with only a tiny proportion of labeled data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18287v1-abstract-full').style.display = 'none'; document.getElementById('2412.18287v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 5 figures, AAAI 2023, code: https://github.com/AI4Risk/antifraud</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the AAAI Conference on Artificial Intelligence. Vol. 37. No. 12. 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17630">arXiv:2412.17630</a> <span> [<a href="https://arxiv.org/pdf/2412.17630">pdf</a>, <a href="https://arxiv.org/format/2412.17630">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Detail-Preserving Latent Diffusion for Stable Shadow Removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiamin Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuxin Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zelong Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chi Wang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+R">Renshu Gu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Weiwei Xu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+G">Gang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17630v1-abstract-short" style="display: inline;"> Achieving high-quality shadow removal with strong generalizability is challenging in scenes with complex global illumination. Due to the limited diversity in shadow removal datasets, current methods are prone to overfitting training data, often leading to reduced performance on unseen cases. To address this, we leverage the rich visual priors of a pre-trained Stable Diffusion (SD) model and propos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17630v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17630v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17630v1-abstract-full" style="display: none;"> Achieving high-quality shadow removal with strong generalizability is challenging in scenes with complex global illumination. Due to the limited diversity in shadow removal datasets, current methods are prone to overfitting training data, often leading to reduced performance on unseen cases. To address this, we leverage the rich visual priors of a pre-trained Stable Diffusion (SD) model and propose a two-stage fine-tuning pipeline to adapt the SD model for stable and efficient shadow removal. In the first stage, we fix the VAE and fine-tune the denoiser in latent space, which yields substantial shadow removal but may lose some high-frequency details. To resolve this, we introduce a second stage, called the detail injection stage. This stage selectively extracts features from the VAE encoder to modulate the decoder, injecting fine details into the final results. Experimental results show that our method outperforms state-of-the-art shadow removal techniques. The cross-dataset evaluation further demonstrates that our method generalizes effectively to unseen data, enhancing the applicability of shadow removal methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17630v1-abstract-full').style.display = 'none'; document.getElementById('2412.17630v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>