CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 107 results for author: <span class="mathjax">Hou, S</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Hou%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Hou, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Hou%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Hou, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hou%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hou%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hou%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Hou%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01699">arXiv:2502.01699</a> <span> [<a href="https://arxiv.org/pdf/2502.01699">pdf</a>, <a href="https://arxiv.org/format/2502.01699">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Inverse Attention Network with Intrinsic Discriminant Feature Exploitation for Fake News Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianlin Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+E">En Yu</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y">Yi Shao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuai Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sujuan Hou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jiande Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01699v1-abstract-short" style="display: inline;"> Multimodal fake news detection has garnered significant attention due to its profound implications for social security. While existing approaches have contributed to understanding cross-modal consistency, they often fail to leverage modal-specific representations and explicit discrepant features. To address these limitations, we propose a Multimodal Inverse Attention Network (MIAN), a novel framew… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01699v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01699v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01699v1-abstract-full" style="display: none;"> Multimodal fake news detection has garnered significant attention due to its profound implications for social security. While existing approaches have contributed to understanding cross-modal consistency, they often fail to leverage modal-specific representations and explicit discrepant features. To address these limitations, we propose a Multimodal Inverse Attention Network (MIAN), a novel framework that explores intrinsic discriminative features based on news content to advance fake news detection. Specifically, MIAN introduces a hierarchical learning module that captures diverse intra-modal relationships through local-to-global and local-to-local interactions, thereby generating enhanced unimodal representations to improve the identification of fake news at the intra-modal level. Additionally, a cross-modal interaction module employs a co-attention mechanism to establish and model dependencies between the refined unimodal representations, facilitating seamless semantic integration across modalities. To explicitly extract inconsistency features, we propose an inverse attention mechanism that effectively highlights the conflicting patterns and semantic deviations introduced by fake news in both intra- and inter-modality. Extensive experiments on benchmark datasets demonstrate that MIAN significantly outperforms state-of-the-art methods, underscoring its pivotal contribution to advancing social security through enhanced multimodal fake news detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01699v1-abstract-full').style.display = 'none'; document.getElementById('2502.01699v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01430">arXiv:2502.01430</a> <span> [<a href="https://arxiv.org/pdf/2502.01430">pdf</a>, <a href="https://arxiv.org/format/2502.01430">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Molecular Odor Prediction Based on Multi-Feature Graph Attention Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+H">HongXin Xie</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">JianDe Sun</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y">Yi Shao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuai Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sujuan Hou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">YuLong Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jian Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01430v1-abstract-short" style="display: inline;"> Olfactory perception plays a critical role in both human and organismal interactions, yet understanding of its underlying mechanisms and influencing factors remain insufficient. Molecular structures influence odor perception through intricate biochemical interactions, and accurately quantifying structure-odor relationships presents significant challenges. The Quantitative Structure-Odor Relationsh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01430v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01430v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01430v1-abstract-full" style="display: none;"> Olfactory perception plays a critical role in both human and organismal interactions, yet understanding of its underlying mechanisms and influencing factors remain insufficient. Molecular structures influence odor perception through intricate biochemical interactions, and accurately quantifying structure-odor relationships presents significant challenges. The Quantitative Structure-Odor Relationship (QSOR) task, which involves predicting the associations between molecular structures and their corresponding odors, seeks to address these challenges. To this end, we propose a method for QSOR, utilizing Graph Attention Networks to model molecular structures and capture both local and global features. Unlike conventional QSOR approaches reliant on predefined descriptors, our method leverages diverse molecular feature extraction techniques to automatically learn comprehensive representations. This integration enhances the model's capacity to handle complex molecular information, improves prediction accuracy. Our approach demonstrates clear advantages in QSOR prediction tasks, offering valuable insights into the application of deep learning in cheminformatics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01430v1-abstract-full').style.display = 'none'; document.getElementById('2502.01430v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01296">arXiv:2502.01296</a> <span> [<a href="https://arxiv.org/pdf/2502.01296">pdf</a>, <a href="https://arxiv.org/format/2502.01296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Molecular Odor Prediction with Harmonic Modulated Feature Mapping and Chemically-Informed Loss </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+H">HongXin Xie</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">JianDe Sun</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y">Yi Shao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuai Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sujuan Hou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">YuLong Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuxiang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01296v1-abstract-short" style="display: inline;"> Molecular odor prediction has great potential across diverse fields such as chemistry, pharmaceuticals, and environmental science, enabling the rapid design of new materials and enhancing environmental monitoring. However, current methods face two main challenges: First, existing models struggle with non-smooth objective functions and the complexity of mixed feature dimensions; Second, datasets su… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01296v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01296v1-abstract-full" style="display: none;"> Molecular odor prediction has great potential across diverse fields such as chemistry, pharmaceuticals, and environmental science, enabling the rapid design of new materials and enhancing environmental monitoring. However, current methods face two main challenges: First, existing models struggle with non-smooth objective functions and the complexity of mixed feature dimensions; Second, datasets suffer from severe label imbalance, which hampers model training, particularly in learning minority class labels. To address these issues, we introduce a novel feature mapping method and a molecular ensemble optimization loss function. By incorporating feature importance learning and frequency modulation, our model adaptively adjusts the contribution of each feature, efficiently capturing the intricate relationship between molecular structures and odor descriptors. Our feature mapping preserves feature independence while enhancing the model's efficiency in utilizing molecular features through frequency modulation. Furthermore, the proposed loss function dynamically adjusts label weights, improves structural consistency, and strengthens label correlations, effectively addressing data imbalance and label co-occurrence challenges. Experimental results show that our method significantly can improves the accuracy of molecular odor prediction across various deep learning models, demonstrating its promising potential in molecular structure representation and chemoinformatics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01296v1-abstract-full').style.display = 'none'; document.getElementById('2502.01296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01206">arXiv:2502.01206</a> <span> [<a href="https://arxiv.org/pdf/2502.01206">pdf</a>, <a href="https://arxiv.org/format/2502.01206">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> PerfSeer: An Efficient and Accurate Deep Learning Models Performance Predictor </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xinlong Zhao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jiande Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jia Zhang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sujuan Hou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuai Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tong Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Ke Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01206v1-abstract-short" style="display: inline;"> Predicting the performance of deep learning (DL) models, such as execution time and resource utilization, is crucial for Neural Architecture Search (NAS), DL cluster schedulers, and other technologies that advance deep learning. The representation of a model is the foundation for its performance prediction. However, existing methods cannot comprehensively represent diverse model configurations, re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01206v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01206v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01206v1-abstract-full" style="display: none;"> Predicting the performance of deep learning (DL) models, such as execution time and resource utilization, is crucial for Neural Architecture Search (NAS), DL cluster schedulers, and other technologies that advance deep learning. The representation of a model is the foundation for its performance prediction. However, existing methods cannot comprehensively represent diverse model configurations, resulting in unsatisfactory accuracy. To address this, we represent a model as a graph that includes the topology, along with the node, edge, and global features, all of which are crucial for effectively capturing the performance of the model. Based on this representation, we propose PerfSeer, a novel predictor that uses a Graph Neural Network (GNN)-based performance prediction model, SeerNet. SeerNet fully leverages the topology and various features, while incorporating optimizations such as Synergistic Max-Mean aggregation (SynMM) and Global-Node Perspective Boost (GNPB) to capture the critical performance information more effectively, enabling it to predict the performance of models accurately. Furthermore, SeerNet can be extended to SeerNet-Multi by using Project Conflicting Gradients (PCGrad), enabling efficient simultaneous prediction of multiple performance metrics without significantly affecting accuracy. We constructed a dataset containing performance metrics for 53k+ model configurations, including execution time, memory usage, and Streaming Multiprocessor (SM) utilization during both training and inference. The evaluation results show that PerfSeer outperforms nn-Meter, Brp-NAS, and DIPPM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01206v1-abstract-full').style.display = 'none'; document.getElementById('2502.01206v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11007">arXiv:2501.11007</a> <span> [<a href="https://arxiv.org/pdf/2501.11007">pdf</a>, <a href="https://arxiv.org/format/2501.11007">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> HFGCN:Hypergraph Fusion Graph Convolutional Networks for Skeleton-Based Action Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+P">Pengcheng Dong</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+W">Wenbo Wan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huaxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuai Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sujuan Hou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jiande Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11007v3-abstract-short" style="display: inline;"> In recent years, action recognition has received much attention and wide application due to its important role in video understanding. Most of the researches on action recognition methods focused on improving the performance via various deep learning methods rather than the classification of skeleton points. The topological modeling between skeleton points and body parts was seldom considered. Alt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11007v3-abstract-full').style.display = 'inline'; document.getElementById('2501.11007v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11007v3-abstract-full" style="display: none;"> In recent years, action recognition has received much attention and wide application due to its important role in video understanding. Most of the researches on action recognition methods focused on improving the performance via various deep learning methods rather than the classification of skeleton points. The topological modeling between skeleton points and body parts was seldom considered. Although some studies have used a data-driven approach to classify the topology of the skeleton point, the nature of the skeleton point in terms of kinematics has not been taken into consideration. Therefore, in this paper, we draw on the theory of kinematics to adapt the topological relations of the skeleton point and propose a topological relation classification based on body parts and distance from core of body. To synthesize these topological relations for action recognition, we propose a novel Hypergraph Fusion Graph Convolutional Network (HFGCN). In particular, the proposed model is able to focus on the human skeleton points and the different body parts simultaneously, and thus construct the topology, which improves the recognition accuracy obviously. We use a hypergraph to represent the categorical relationships of these skeleton points and incorporate the hypergraph into a graph convolution network to model the higher-order relationships among the skeleton points and enhance the feature representation of the network. In addition, our proposed hypergraph attention module and hypergraph graph convolution module optimize topology modeling in temporal and channel dimensions, respectively, to further enhance the feature representation of the network. We conducted extensive experiments on three widely used datasets.The results validate that our proposed method can achieve the best performance when compared with the state-of-the-art skeleton-based methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11007v3-abstract-full').style.display = 'none'; document.getElementById('2501.11007v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03336">arXiv:2501.03336</a> <span> [<a href="https://arxiv.org/pdf/2501.03336">pdf</a>, <a href="https://arxiv.org/format/2501.03336">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Mobile Augmented Reality Framework with Fusional Localization and Pose Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Songlin Hou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+F">Fangzhou Lin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yunmei Huang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Zhe Peng</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+B">Bin Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03336v1-abstract-short" style="display: inline;"> As a novel way of presenting information, augmented reality (AR) enables people to interact with the physical world in a direct and intuitive way. While there are some mobile AR products implemented with specific hardware at a high cost, the software approaches of AR implementation on mobile platforms(such as smartphones, tablet PC, etc.) are still far from practical use. GPS-based mobile AR syste… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03336v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03336v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03336v1-abstract-full" style="display: none;"> As a novel way of presenting information, augmented reality (AR) enables people to interact with the physical world in a direct and intuitive way. While there are some mobile AR products implemented with specific hardware at a high cost, the software approaches of AR implementation on mobile platforms(such as smartphones, tablet PC, etc.) are still far from practical use. GPS-based mobile AR systems usually perform poorly due to the inaccurate positioning in the indoor environment. Previous vision-based pose estimation methods need to continuously track predefined markers within a short distance, which greatly degrade user experience. This paper first conducts a comprehensive study of the state-of-the-art AR and localization systems on mobile platforms. Then, we propose an effective indoor mobile AR framework. In the framework, a fusional localization method and a new pose estimation implementation are developed to increase the overall matching rate and thus improving AR display accuracy. Experiments show that our framework has higher performance than approaches purely based on images or Wi-Fi signals. We achieve low average error distances (0.61-0.81m) and accurate matching rates (77%-82%) when the average sampling grid length is set to 0.5m. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03336v1-abstract-full').style.display = 'none'; document.getElementById('2501.03336v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figues</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17951">arXiv:2412.17951</a> <span> [<a href="https://arxiv.org/pdf/2412.17951">pdf</a>, <a href="https://arxiv.org/format/2412.17951">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Hyperbolic Chamfer Distance for Point Cloud Completion and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+F">Fangzhou Lin</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Songlin Hou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haotian Liu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shang Gao</a>, <a href="/search/cs?searchtype=author&query=Yamada%2C+K+D">Kazunori D Yamada</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H+K">Haichong K. Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Ziming Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17951v1-abstract-short" style="display: inline;"> Chamfer Distance (CD) is widely used as a metric to quantify difference between two point clouds. In point cloud completion, Chamfer Distance (CD) is typically used as a loss function in deep learning frameworks. However, it is generally acknowledged within the field that Chamfer Distance (CD) is vulnerable to the presence of outliers, which can consequently lead to the convergence on suboptimal m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17951v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17951v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17951v1-abstract-full" style="display: none;"> Chamfer Distance (CD) is widely used as a metric to quantify difference between two point clouds. In point cloud completion, Chamfer Distance (CD) is typically used as a loss function in deep learning frameworks. However, it is generally acknowledged within the field that Chamfer Distance (CD) is vulnerable to the presence of outliers, which can consequently lead to the convergence on suboptimal models. In divergence from the existing literature, which largely concentrates on resolving such concerns in the realm of Euclidean space, we put forth a notably uncomplicated yet potent metric specifically designed for point cloud completion tasks: {Hyperbolic Chamfer Distance (HyperCD)}. This metric conducts Chamfer Distance computations within the parameters of hyperbolic space. During the backpropagation process, HyperCD systematically allocates greater weight to matched point pairs exhibiting reduced Euclidean distances. This mechanism facilitates the preservation of accurate point pair matches while permitting the incremental adjustment of suboptimal matches, thereby contributing to enhanced point cloud completion outcomes. Moreover, measure the shape dissimilarity is not solely work for point cloud completion task, we further explore its applications in other generative related tasks, including single image reconstruction from point cloud, and upsampling. We demonstrate state-of-the-art performance on the point cloud completion benchmark datasets, PCN, ShapeNet-55, and ShapeNet-34, and show from visualization that HyperCD can significantly improve the surface smoothness, we also provide the provide experimental results beyond completion task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17951v1-abstract-full').style.display = 'none'; document.getElementById('2412.17951v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05587">arXiv:2412.05587</a> <span> [<a href="https://arxiv.org/pdf/2412.05587">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> GEE-OPs: An Operator Knowledge Base for Geospatial Code Generation on the Google Earth Engine Platform Powered by Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuyang Hou</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jianyuan Liang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+A">Anqi Zhao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Huayi Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05587v2-abstract-short" style="display: inline;"> As the scale and complexity of spatiotemporal data continue to grow rapidly, the use of geospatial modeling on the Google Earth Engine (GEE) platform presents dual challenges: improving the coding efficiency of domain experts and enhancing the coding capabilities of interdisciplinary users. To address these challenges and improve the performance of large language models (LLMs) in geospatial code g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05587v2-abstract-full').style.display = 'inline'; document.getElementById('2412.05587v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05587v2-abstract-full" style="display: none;"> As the scale and complexity of spatiotemporal data continue to grow rapidly, the use of geospatial modeling on the Google Earth Engine (GEE) platform presents dual challenges: improving the coding efficiency of domain experts and enhancing the coding capabilities of interdisciplinary users. To address these challenges and improve the performance of large language models (LLMs) in geospatial code generation tasks, we propose a framework for building a geospatial operator knowledge base tailored to the GEE JavaScript API. This framework consists of an operator syntax knowledge table, an operator relationship frequency table, an operator frequent pattern knowledge table, and an operator relationship chain knowledge table. By leveraging Abstract Syntax Tree (AST) techniques and frequent itemset mining, we systematically extract operator knowledge from 185,236 real GEE scripts and syntax documentation, forming a structured knowledge base. Experimental results demonstrate that the framework achieves over 90% accuracy, recall, and F1 score in operator knowledge extraction. When integrated with the Retrieval-Augmented Generation (RAG) strategy for LLM-based geospatial code generation tasks, the knowledge base improves performance by 20-30%. Ablation studies further quantify the necessity of each knowledge table in the knowledge base construction. This work provides robust support for the advancement and application of geospatial code modeling techniques, offering an innovative approach to constructing domain-specific knowledge bases that enhance the code generation capabilities of LLMs, and fostering the deeper integration of generative AI technologies within the field of geoinformatics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05587v2-abstract-full').style.display = 'none'; document.getElementById('2412.05587v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01663">arXiv:2412.01663</a> <span> [<a href="https://arxiv.org/pdf/2412.01663">pdf</a>, <a href="https://arxiv.org/format/2412.01663">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> DaDu-E: Rethinking the Role of Large Language Model in Robotic Computing Pipeline </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+W">Wenhao Sun</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sai Hou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zixuan Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+B">Bo Yu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shaoshan Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xu Yang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+S">Shuai Liang</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+Y">Yiming Gan</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yinhe Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01663v1-abstract-short" style="display: inline;"> Performing complex tasks in open environments remains challenging for robots, even when using large language models (LLMs) as the core planner. Many LLM-based planners are inefficient due to their large number of parameters and prone to inaccuracies because they operate in open-loop systems. We think the reason is that only applying LLMs as planners is insufficient. In this work, we propose DaDu-E… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01663v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01663v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01663v1-abstract-full" style="display: none;"> Performing complex tasks in open environments remains challenging for robots, even when using large language models (LLMs) as the core planner. Many LLM-based planners are inefficient due to their large number of parameters and prone to inaccuracies because they operate in open-loop systems. We think the reason is that only applying LLMs as planners is insufficient. In this work, we propose DaDu-E, a robust closed-loop planning framework for embodied AI robots. Specifically, DaDu-E is equipped with a relatively lightweight LLM, a set of encapsulated robot skill instructions, a robust feedback system, and memory augmentation. Together, these components enable DaDu-E to (i) actively perceive and adapt to dynamic environments, (ii) optimize computational costs while maintaining high performance, and (iii) recover from execution failures using its memory and feedback mechanisms. Extensive experiments on real-world and simulated tasks show that DaDu-E achieves task success rates comparable to embodied AI robots with larger models as planners like COME-Robot, while reducing computational requirements by $6.6 \times$. Users are encouraged to explore our system at: \url{https://rlc-lab.github.io/dadu-e/}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01663v1-abstract-full').style.display = 'none'; document.getElementById('2412.01663v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 5 figures, submitted to JFR</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16154">arXiv:2411.16154</a> <span> [<a href="https://arxiv.org/pdf/2411.16154">pdf</a>, <a href="https://arxiv.org/format/2411.16154">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> DeDe: Detecting Backdoor Samples for SSL Encoders via Decoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sizai Hou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Songze Li</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+D">Duanyi Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16154v1-abstract-short" style="display: inline;"> Self-supervised learning (SSL) is pervasively exploited in training high-quality upstream encoders with a large amount of unlabeled data. However, it is found to be susceptible to backdoor attacks merely via polluting a small portion of training data. The victim encoders mismatch triggered inputs with target embeddings, e.g., match the triggered cat input to an airplane embedding, such that the do… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16154v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16154v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16154v1-abstract-full" style="display: none;"> Self-supervised learning (SSL) is pervasively exploited in training high-quality upstream encoders with a large amount of unlabeled data. However, it is found to be susceptible to backdoor attacks merely via polluting a small portion of training data. The victim encoders mismatch triggered inputs with target embeddings, e.g., match the triggered cat input to an airplane embedding, such that the downstream tasks are affected to misbehave when the trigger is activated. Emerging backdoor attacks have shown great threats in different SSL paradigms such as contrastive learning and CLIP, while few research is devoted to defending against such attacks. Besides, the existing ones fall short in detecting advanced stealthy backdoors. To address the limitations, we propose a novel detection mechanism, DeDe, which detects the activation of the backdoor mapping with the cooccurrence of victim encoder and trigger inputs. Specifically, DeDe trains a decoder for the SSL encoder on an auxiliary dataset (can be out-of-distribution or even slightly poisoned), such that for any triggered input that misleads to the target embedding, the decoder outputs an image significantly different from the input. We empirically evaluate DeDe on both contrastive learning and CLIP models against various types of backdoor attacks, and demonstrate its superior performance over SOTA detection methods in both upstream detection performance and ability of preventing backdoors in downstream tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16154v1-abstract-full').style.display = 'none'; document.getElementById('2411.16154v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10753">arXiv:2411.10753</a> <span> [<a href="https://arxiv.org/pdf/2411.10753">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Chain-of-Programming (CoP) : Empowering Large Language Models for Geospatial Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuyang Hou</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+H">Haoyue Jiao</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhangxiao Shen</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jianyuan Liang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+A">Anqi Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaopu Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianxun Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Huayi Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10753v1-abstract-short" style="display: inline;"> With the rapid growth of interdisciplinary demands for geospatial modeling and the rise of large language models (LLMs), geospatial code generation technology has seen significant advancements. However, existing LLMs often face challenges in the geospatial code generation process due to incomplete or unclear user requirements and insufficient knowledge of specific platform syntax rules, leading to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10753v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10753v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10753v1-abstract-full" style="display: none;"> With the rapid growth of interdisciplinary demands for geospatial modeling and the rise of large language models (LLMs), geospatial code generation technology has seen significant advancements. However, existing LLMs often face challenges in the geospatial code generation process due to incomplete or unclear user requirements and insufficient knowledge of specific platform syntax rules, leading to the generation of non-executable code, a phenomenon known as "code hallucination." To address this issue, this paper proposes a Chain of Programming (CoP) framework, which decomposes the code generation process into five steps: requirement analysis, algorithm design, code implementation, code debugging, and code annotation. The framework incorporates a shared information pool, knowledge base retrieval, and user feedback mechanisms, forming an end-to-end code generation flow from requirements to code without the need for model fine-tuning. Based on a geospatial problem classification framework and evaluation benchmarks, the CoP strategy significantly improves the logical clarity, syntactical correctness, and executability of the generated code, with improvements ranging from 3.0% to 48.8%. Comparative and ablation experiments further validate the superiority of the CoP strategy over other optimization approaches and confirm the rationality and necessity of its key components. Through case studies on building data visualization and fire data analysis, this paper demonstrates the application and effectiveness of CoP in various geospatial scenarios. The CoP framework offers a systematic, step-by-step approach to LLM-based geospatial code generation tasks, significantly enhancing code generation performance in geospatial tasks and providing valuable insights for code generation in other vertical domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10753v1-abstract-full').style.display = 'none'; document.getElementById('2411.10753v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08800">arXiv:2411.08800</a> <span> [<a href="https://arxiv.org/pdf/2411.08800">pdf</a>, <a href="https://arxiv.org/format/2411.08800">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning Accelerated Quantum Transport Simulations in Nanoelectronics: From Break Junctions to Field-Effect Transistors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+J">Jijie Zou</a>, <a href="/search/cs?searchtype=author&query=Zhouyin%2C+Z">Zhanghao Zhouyin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+D">Dongying Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Linfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shimin Hou</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Q">Qiangqiang Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08800v2-abstract-short" style="display: inline;"> Quantum transport calculations are essential for understanding and designing nanoelectronic devices, yet the trade-off between accuracy and computational efficiency has long limited their practical applications. We present a general framework that combines the deep learning tight-binding Hamiltonian (DeePTB) approach with the non-equilibrium Green's Function (NEGF) method, enabling efficient quant… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08800v2-abstract-full').style.display = 'inline'; document.getElementById('2411.08800v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08800v2-abstract-full" style="display: none;"> Quantum transport calculations are essential for understanding and designing nanoelectronic devices, yet the trade-off between accuracy and computational efficiency has long limited their practical applications. We present a general framework that combines the deep learning tight-binding Hamiltonian (DeePTB) approach with the non-equilibrium Green's Function (NEGF) method, enabling efficient quantum transport calculations while maintaining first-principles accuracy. We demonstrate the capabilities of the DeePTB-NEGF framework through two representative applications: comprehensive simulation of break junction systems, where conductance histograms show good agreement with experimental measurements in both metallic contact and single-molecule junction cases; and simulation of carbon nanotube field effect transistors through self-consistent NEGF-Poisson calculations, capturing essential physics including the electrostatic potential and transfer characteristic curves under finite bias conditions. This framework bridges the gap between first-principles accuracy and computational efficiency, providing a powerful tool for high-throughput quantum transport simulations across different scales in nanoelectronics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08800v2-abstract-full').style.display = 'none'; document.getElementById('2411.08800v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02947">arXiv:2411.02947</a> <span> [<a href="https://arxiv.org/pdf/2411.02947">pdf</a>, <a href="https://arxiv.org/format/2411.02947">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Finance">q-fin.CP</span> </div> </div> <p class="title is-5 mathjax"> Time-Causal VAE: Robust Financial Time Series Generator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Acciaio%2C+B">Beatrice Acciaio</a>, <a href="/search/cs?searchtype=author&query=Eckstein%2C+S">Stephan Eckstein</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Songyan Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02947v1-abstract-short" style="display: inline;"> We build a time-causal variational autoencoder (TC-VAE) for robust generation of financial time series data. Our approach imposes a causality constraint on the encoder and decoder networks, ensuring a causal transport from the real market time series to the fake generated time series. Specifically, we prove that the TC-VAE loss provides an upper bound on the causal Wasserstein distance between mar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02947v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02947v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02947v1-abstract-full" style="display: none;"> We build a time-causal variational autoencoder (TC-VAE) for robust generation of financial time series data. Our approach imposes a causality constraint on the encoder and decoder networks, ensuring a causal transport from the real market time series to the fake generated time series. Specifically, we prove that the TC-VAE loss provides an upper bound on the causal Wasserstein distance between market distributions and generated distributions. Consequently, the TC-VAE loss controls the discrepancy between optimal values of various dynamic stochastic optimization problems under real and generated distributions. To further enhance the model's ability to approximate the latent representation of the real market distribution, we integrate a RealNVP prior into the TC-VAE framework. Finally, extensive numerical experiments show that TC-VAE achieves promising results on both synthetic and real market data. This is done by comparing real and generated distributions according to various statistical distances, demonstrating the effectiveness of the generated data for downstream financial optimization tasks, as well as showcasing that the generated data reproduces stylized facts of real financial market data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02947v1-abstract-full').style.display = 'none'; document.getElementById('2411.02947v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 37M10; 68T07 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00985">arXiv:2411.00985</a> <span> [<a href="https://arxiv.org/pdf/2411.00985">pdf</a>, <a href="https://arxiv.org/format/2411.00985">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> FedDTPT: Federated Discrete and Transferable Prompt Tuning for Black-Box Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiaqi Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Simin Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuzhe Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yijiang Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shiyue Hou</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+R">Rui Jing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zehua Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Z">Zijian Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00985v1-abstract-short" style="display: inline;"> In recent years, large language models (LLMs) have significantly advanced the field of natural language processing (NLP). By fine-tuning LLMs with data from specific scenarios, these foundation models can better adapt to various downstream tasks. However, the fine-tuning process poses privacy leakage risks, particularly in centralized data processing scenarios. To address user privacy concerns, fe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00985v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00985v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00985v1-abstract-full" style="display: none;"> In recent years, large language models (LLMs) have significantly advanced the field of natural language processing (NLP). By fine-tuning LLMs with data from specific scenarios, these foundation models can better adapt to various downstream tasks. However, the fine-tuning process poses privacy leakage risks, particularly in centralized data processing scenarios. To address user privacy concerns, federated learning (FL) has been introduced to mitigate the risks associated with centralized data collection from multiple sources. Nevertheless, the privacy of LLMs themselves is equally critical, as potential malicious attacks challenge their security, an issue that has received limited attention in current research. Consequently, establishing a trusted multi-party model fine-tuning environment is essential. Additionally, the local deployment of large LLMs incurs significant storage costs and high computational demands. To address these challenges, we propose for the first time a federated discrete and transferable prompt tuning, namely FedDTPT, for black-box large language models. In the client optimization phase, we adopt a token-level discrete prompt optimization method that leverages a feedback loop based on prediction accuracy to drive gradient-free prompt optimization through the MLM API. For server optimization, we employ an attention mechanism based on semantic similarity to filter all local prompt tokens, along with an embedding distance elbow detection and DBSCAN clustering strategy to enhance the filtering process. Experimental results demonstrate that, compared to state-of-the-art methods, our approach achieves higher accuracy, reduced communication overhead, and robustness to non-iid data in a black-box setting. Moreover, the optimized prompts are transferable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00985v1-abstract-full').style.display = 'none'; document.getElementById('2411.00985v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20975">arXiv:2410.20975</a> <span> [<a href="https://arxiv.org/pdf/2410.20975">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Geo-FuB: A Method for Constructing an Operator-Function Knowledge Base for Geospatial Code Generation Tasks Using Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuyang Hou</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+A">Anqi Zhao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jianyuan Liang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhangxiao Shen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Huayi Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20975v1-abstract-short" style="display: inline;"> The rise of spatiotemporal data and the need for efficient geospatial modeling have spurred interest in automating these tasks with large language models (LLMs). However, general LLMs often generate errors in geospatial code due to a lack of domain-specific knowledge on functions and operators. To address this, a retrieval-augmented generation (RAG) approach, utilizing an external knowledge base o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20975v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20975v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20975v1-abstract-full" style="display: none;"> The rise of spatiotemporal data and the need for efficient geospatial modeling have spurred interest in automating these tasks with large language models (LLMs). However, general LLMs often generate errors in geospatial code due to a lack of domain-specific knowledge on functions and operators. To address this, a retrieval-augmented generation (RAG) approach, utilizing an external knowledge base of geospatial functions and operators, is proposed. This study introduces a framework to construct such a knowledge base, leveraging geospatial script semantics. The framework includes: Function Semantic Framework Construction (Geo-FuSE), Frequent Operator Combination Statistics (Geo-FuST), and Semantic Mapping (Geo-FuM). Techniques like Chain-of-Thought, TF-IDF, and the APRIORI algorithm are utilized to derive and align geospatial functions. An example knowledge base, Geo-FuB, built from 154,075 Google Earth Engine scripts, is available on GitHub. Evaluation metrics show a high accuracy, reaching 88.89% overall, with structural and semantic accuracies of 92.03% and 86.79% respectively. Geo-FuB's potential to optimize geospatial code generation through the RAG and fine-tuning paradigms is highlighted. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20975v1-abstract-full').style.display = 'none'; document.getElementById('2410.20975v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17031">arXiv:2410.17031</a> <span> [<a href="https://arxiv.org/pdf/2410.17031">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GeoCode-GPT: A Large Language Model for Geospatial Code Generation Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuyang Hou</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhangxiao Shen</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+A">Anqi Zhao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jianyuan Liang</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+Z">Zhipeng Gui</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+X">Xuefeng Guan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Huayi Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17031v2-abstract-short" style="display: inline;"> The increasing demand for spatiotemporal data and modeling tasks in geosciences has made geospatial code generation technology a critical factor in enhancing productivity. Although large language models (LLMs) have demonstrated potential in code generation tasks, they often encounter issues such as refusal to code or hallucination in geospatial code generation due to a lack of domain-specific know… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17031v2-abstract-full').style.display = 'inline'; document.getElementById('2410.17031v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17031v2-abstract-full" style="display: none;"> The increasing demand for spatiotemporal data and modeling tasks in geosciences has made geospatial code generation technology a critical factor in enhancing productivity. Although large language models (LLMs) have demonstrated potential in code generation tasks, they often encounter issues such as refusal to code or hallucination in geospatial code generation due to a lack of domain-specific knowledge and code corpora. To address these challenges, this paper presents and open-sources the GeoCode-PT and GeoCode-SFT corpora, along with the GeoCode-Eval evaluation dataset. Additionally, by leveraging QLoRA and LoRA for pretraining and fine-tuning, we introduce GeoCode-GPT-7B, the first LLM focused on geospatial code generation, fine-tuned from Code Llama-7B. Furthermore, we establish a comprehensive geospatial code evaluation framework, incorporating option matching, expert validation, and prompt engineering scoring for LLMs, and systematically evaluate GeoCode-GPT-7B using the GeoCode-Eval dataset. Experimental results show that GeoCode-GPT outperforms other models in multiple-choice accuracy by 9.1% to 32.1%, in code summarization ability by 1.7% to 25.4%, and in code generation capability by 1.2% to 25.1%. This paper provides a solution and empirical validation for enhancing LLMs' performance in geospatial code generation, extends the boundaries of domain-specific model applications, and offers valuable insights into unlocking their potential in geospatial code generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17031v2-abstract-full').style.display = 'none'; document.getElementById('2410.17031v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09738">arXiv:2410.09738</a> <span> [<a href="https://arxiv.org/pdf/2410.09738">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Can Large Language Models Generate Geospatial Code? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuyang Hou</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhangxiao Shen</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jianyuan Liang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+A">Anqi Zhao</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+Z">Zhipeng Gui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Huayi Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09738v2-abstract-short" style="display: inline;"> With the growing demand for spatiotemporal data processing and geospatial modeling, automating geospatial code generation has become essential for productivity. Large language models (LLMs) show promise in code generation but face challenges like domain-specific knowledge gaps and "coding hallucinations." This paper introduces GeoCode-Eval (GCE), a framework for assessing LLMs' ability to generate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09738v2-abstract-full').style.display = 'inline'; document.getElementById('2410.09738v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09738v2-abstract-full" style="display: none;"> With the growing demand for spatiotemporal data processing and geospatial modeling, automating geospatial code generation has become essential for productivity. Large language models (LLMs) show promise in code generation but face challenges like domain-specific knowledge gaps and "coding hallucinations." This paper introduces GeoCode-Eval (GCE), a framework for assessing LLMs' ability to generate geospatial code across three dimensions: "Cognition and Memory," "Comprehension and Interpretation," and "Innovation and Creation," distributed across eight capability levels. We developed a benchmark dataset, GeoCode-Bench, consisting of 5,000 multiple-choice, 1,500 fill-in-the-blank, 1,500 true/false questions, and 1,000 subjective tasks covering code summarization, generation, completion, and correction. Using GeoCode-Bench, we evaluated three commercial closed-source LLMs, four open-source general-purpose LLMs, and 14 specialized code generation models. We also conducted experiments on few-shot and zero-shot learning, Chain of Thought reasoning, and multi-round majority voting to measure their impact on geospatial code generation. Additionally, we fine-tuned the Code LLaMA-7B model using Google Earth Engine-related JavaScript, creating GEECode-GPT, and evaluated it on subjective tasks. Results show that constructing pre-training and instruction datasets significantly improves code generation, offering insights for optimizing LLMs in specific domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09738v2-abstract-full').style.display = 'none'; document.getElementById('2410.09738v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07588">arXiv:2410.07588</a> <span> [<a href="https://arxiv.org/pdf/2410.07588">pdf</a>, <a href="https://arxiv.org/format/2410.07588">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Careful About What App Promotion Ads Recommend! Detecting and Explaining Malware Promotion via App Promotion Graph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shang Ma</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chaoran Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shao Yang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shifu Hou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T+J">Toby Jia-Jun Li</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+X">Xusheng Xiao</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yanfang Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07588v2-abstract-short" style="display: inline;"> In Android apps, their developers frequently place app promotion ads, namely advertisements to promote other apps. Unfortunately, the inadequate vetting of ad content allows malicious developers to exploit app promotion ads as a new distribution channel for malware. To help detect malware distributed via app promotion ads, in this paper, we propose a novel approach, named ADGPE, that synergistical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07588v2-abstract-full').style.display = 'inline'; document.getElementById('2410.07588v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07588v2-abstract-full" style="display: none;"> In Android apps, their developers frequently place app promotion ads, namely advertisements to promote other apps. Unfortunately, the inadequate vetting of ad content allows malicious developers to exploit app promotion ads as a new distribution channel for malware. To help detect malware distributed via app promotion ads, in this paper, we propose a novel approach, named ADGPE, that synergistically integrates app user interface (UI) exploration with graph learning to automatically collect app promotion ads, detect malware promoted by these ads, and explain the promotion mechanisms employed by the detected malware. Our evaluation on 18, 627 app promotion ads demonstrates the substantial risks in the app promotion ecosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07588v2-abstract-full').style.display = 'none'; document.getElementById('2410.07588v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NDSS Symposium 2025 Accepted Papers</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05151">arXiv:2410.05151</a> <span> [<a href="https://arxiv.org/pdf/2410.05151">pdf</a>, <a href="https://arxiv.org/format/2410.05151">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Editing Music with Melody and Text: Using ControlNet for Diffusion Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Siyuan Hou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shansong Liu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+R">Ruibin Yuan</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+W">Wei Xue</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+Y">Ying Shan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Mangsuo Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05151v2-abstract-short" style="display: inline;"> Despite the significant progress in controllable music generation and editing, challenges remain in the quality and length of generated music due to the use of Mel-spectrogram representations and UNet-based model structures. To address these limitations, we propose a novel approach using a Diffusion Transformer (DiT) augmented with an additional control branch using ControlNet. This allows for lon… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05151v2-abstract-full').style.display = 'inline'; document.getElementById('2410.05151v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05151v2-abstract-full" style="display: none;"> Despite the significant progress in controllable music generation and editing, challenges remain in the quality and length of generated music due to the use of Mel-spectrogram representations and UNet-based model structures. To address these limitations, we propose a novel approach using a Diffusion Transformer (DiT) augmented with an additional control branch using ControlNet. This allows for long-form and variable-length music generation and editing controlled by text and melody prompts. For more precise and fine-grained melody control, we introduce a novel top-$k$ constant-Q Transform representation as the melody prompt, reducing ambiguity compared to previous representations (e.g., chroma), particularly for music with multiple tracks or a wide range of pitch values. To effectively balance the control signals from text and melody prompts, we adopt a curriculum learning strategy that progressively masks the melody prompt, resulting in a more stable training process. Experiments have been performed on text-to-music generation and music-style transfer tasks using open-source instrumental recording data. The results demonstrate that by extending StableAudio, a pre-trained text-controlled DiT model, our approach enables superior melody-controlled editing while retaining good text-to-music generation performance. These results outperform a strong MusicGen baseline in terms of both text-based generation and melody preservation for editing. Audio examples can be found at https://stable-audio-control.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05151v2-abstract-full').style.display = 'none'; document.getElementById('2410.05151v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04878">arXiv:2410.04878</a> <span> [<a href="https://arxiv.org/pdf/2410.04878">pdf</a>, <a href="https://arxiv.org/format/2410.04878">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Grammar Induction for Language Understanding and Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kai%2C+J">Jushi Kai</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shengyuan Hou</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yusheng Huang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhouhan Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04878v1-abstract-short" style="display: inline;"> Grammar induction has made significant progress in recent years. However, it is not clear how the application of induced grammar could enhance practical performance in downstream tasks. In this work, we introduce an unsupervised grammar induction method for language understanding and generation. We construct a grammar parser to induce constituency structures and dependency relations, which is simu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04878v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04878v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04878v1-abstract-full" style="display: none;"> Grammar induction has made significant progress in recent years. However, it is not clear how the application of induced grammar could enhance practical performance in downstream tasks. In this work, we introduce an unsupervised grammar induction method for language understanding and generation. We construct a grammar parser to induce constituency structures and dependency relations, which is simultaneously trained on downstream tasks without additional syntax annotations. The induced grammar features are subsequently incorporated into Transformer as a syntactic mask to guide self-attention. We evaluate and apply our method to multiple machine translation tasks and natural language understanding tasks. Our method demonstrates superior performance compared to the original Transformer and other models enhanced with external parsers. Experimental results indicate that our method is effective in both from-scratch and pre-trained scenarios. Additionally, our research highlights the contribution of explicitly modeling the grammatical structure of texts to neural network models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04878v1-abstract-full').style.display = 'none'; document.getElementById('2410.04878v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00204">arXiv:2410.00204</a> <span> [<a href="https://arxiv.org/pdf/2410.00204">pdf</a>, <a href="https://arxiv.org/format/2410.00204">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OpenAnimals: Revisiting Person Re-Identification for Animals Towards Better Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Saihui Hou</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+P">Panjian Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zengbin Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuan Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zeyu Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Man Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongzhen Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00204v1-abstract-short" style="display: inline;"> This paper addresses the challenge of animal re-identification, an emerging field that shares similarities with person re-identification but presents unique complexities due to the diverse species, environments and poses. To facilitate research in this domain, we introduce OpenAnimals, a flexible and extensible codebase designed specifically for animal re-identification. We conduct a comprehensive… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00204v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00204v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00204v1-abstract-full" style="display: none;"> This paper addresses the challenge of animal re-identification, an emerging field that shares similarities with person re-identification but presents unique complexities due to the diverse species, environments and poses. To facilitate research in this domain, we introduce OpenAnimals, a flexible and extensible codebase designed specifically for animal re-identification. We conduct a comprehensive study by revisiting several state-of-the-art person re-identification methods, including BoT, AGW, SBS, and MGN, and evaluate their effectiveness on animal re-identification benchmarks such as HyenaID, LeopardID, SeaTurtleID, and WhaleSharkID. Our findings reveal that while some techniques generalize well, many do not, underscoring the significant differences between the two tasks. To bridge this gap, we propose ARBase, a strong \textbf{Base} model tailored for \textbf{A}nimal \textbf{R}e-identification, which incorporates insights from extensive experiments and introduces simple yet effective animal-oriented designs. Experiments demonstrate that ARBase consistently outperforms existing baselines, achieving state-of-the-art performance across various benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00204v1-abstract-full').style.display = 'none'; document.getElementById('2410.00204v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14908">arXiv:2409.14908</a> <span> [<a href="https://arxiv.org/pdf/2409.14908">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KARMA: Augmenting Embodied AI Agents with Long-and-short Term Memory Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zixuan Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+B">Bo Yu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junzhe Zhao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Wenhao Sun</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sai Hou</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+S">Shuai Liang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xing Hu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yinhe Han</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+Y">Yiming Gan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14908v1-abstract-short" style="display: inline;"> Embodied AI agents responsible for executing interconnected, long-sequence household tasks often face difficulties with in-context memory, leading to inefficiencies and errors in task execution. To address this issue, we introduce KARMA, an innovative memory system that integrates long-term and short-term memory modules, enhancing large language models (LLMs) for planning in embodied agents throug… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14908v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14908v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14908v1-abstract-full" style="display: none;"> Embodied AI agents responsible for executing interconnected, long-sequence household tasks often face difficulties with in-context memory, leading to inefficiencies and errors in task execution. To address this issue, we introduce KARMA, an innovative memory system that integrates long-term and short-term memory modules, enhancing large language models (LLMs) for planning in embodied agents through memory-augmented prompting. KARMA distinguishes between long-term and short-term memory, with long-term memory capturing comprehensive 3D scene graphs as representations of the environment, while short-term memory dynamically records changes in objects' positions and states. This dual-memory structure allows agents to retrieve relevant past scene experiences, thereby improving the accuracy and efficiency of task planning. Short-term memory employs strategies for effective and adaptive memory replacement, ensuring the retention of critical information while discarding less pertinent data. Compared to state-of-the-art embodied agents enhanced with memory, our memory-augmented embodied AI agent improves success rates by 1.3x and 2.3x in Composite Tasks and Complex Tasks within the AI2-THOR simulator, respectively, and enhances task execution efficiency by 3.4x and 62.7x. Furthermore, we demonstrate that KARMA's plug-and-play capability allows for seamless deployment on real-world robotic systems, such as mobile manipulation platforms.Through this plug-and-play memory system, KARMA significantly enhances the ability of embodied agents to generate coherent and contextually appropriate plans, making the execution of complex household tasks more efficient. The experimental videos from the work can be found at https://youtu.be/4BT7fnw9ehs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14908v1-abstract-full').style.display = 'none'; document.getElementById('2409.14908v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08602">arXiv:2409.08602</a> <span> [<a href="https://arxiv.org/pdf/2409.08602">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Geophysics">physics.geo-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1190/geo2020-0865.1">10.1190/geo2020-0865.1 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep learning-based shot-domain seismic deblending </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jing Sun</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Song Hou</a>, <a href="/search/cs?searchtype=author&query=Vinje%2C+V">Vetle Vinje</a>, <a href="/search/cs?searchtype=author&query=Poole%2C+G">Gordon Poole</a>, <a href="/search/cs?searchtype=author&query=Gelius%2C+L">Leiv-J Gelius</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08602v1-abstract-short" style="display: inline;"> To streamline fast-track processing of large data volumes, we have developed a deep learning approach to deblend seismic data in the shot domain based on a practical strategy for generating high-quality training data along with a list of data conditioning techniques to improve performance of the data-driven model. We make use of unblended shot gathers acquired at the end of each sail line, to whic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08602v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08602v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08602v1-abstract-full" style="display: none;"> To streamline fast-track processing of large data volumes, we have developed a deep learning approach to deblend seismic data in the shot domain based on a practical strategy for generating high-quality training data along with a list of data conditioning techniques to improve performance of the data-driven model. We make use of unblended shot gathers acquired at the end of each sail line, to which the access requires no additional time or labor costs beyond the blended acquisition. By manually blending these data we obtain training data with good control of the ground truth and fully adapted to the given survey. Furthermore, we train a deep neural network using multi-channel inputs that include adjacent blended shot gathers as additional channels. The prediction of the blending noise is added in as a related and auxiliary task with the main task of the network being the prediction of the primary-source events. Blending noise in the ground truth is scaled down during the training and validation process due to its excessively strong amplitudes. As part of the process, the to-be-deblended shot gathers are aligned by the blending noise. Implementation on field blended-by-acquisition data demonstrates that introducing the suggested data conditioning steps can considerably reduce the leakage of primary-source events in the deep part of the blended section. The complete proposed approach performs almost as well as a conventional algorithm in the shallow section and shows great advantage in efficiency. It performs slightly worse for larger traveltimes, but still removes the blending noise efficiently. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08602v1-abstract-full').style.display = 'none'; document.getElementById('2409.08602v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Geophysics, 2022, vol. 87, no. 3, pp. V215-V226 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06171">arXiv:2409.06171</a> <span> [<a href="https://arxiv.org/pdf/2409.06171">pdf</a>, <a href="https://arxiv.org/format/2409.06171">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Loss Distillation via Gradient Matching for Point Cloud Completion with Weighted Chamfer Distance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+F">Fangzhou Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haotian Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Haoying Zhou</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Songlin Hou</a>, <a href="/search/cs?searchtype=author&query=Yamada%2C+K+D">Kazunori D Yamada</a>, <a href="/search/cs?searchtype=author&query=Fischer%2C+G+S">Gregory S. Fischer</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yanhua Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H+K">Haichong K. Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Ziming Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06171v1-abstract-short" style="display: inline;"> 3D point clouds enhanced the robot's ability to perceive the geometrical information of the environments, making it possible for many downstream tasks such as grasp pose detection and scene understanding. The performance of these tasks, though, heavily relies on the quality of data input, as incomplete can lead to poor results and failure cases. Recent training loss functions designed for deep lea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06171v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06171v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06171v1-abstract-full" style="display: none;"> 3D point clouds enhanced the robot's ability to perceive the geometrical information of the environments, making it possible for many downstream tasks such as grasp pose detection and scene understanding. The performance of these tasks, though, heavily relies on the quality of data input, as incomplete can lead to poor results and failure cases. Recent training loss functions designed for deep learning-based point cloud completion, such as Chamfer distance (CD) and its variants (\eg HyperCD ), imply a good gradient weighting scheme can significantly boost performance. However, these CD-based loss functions usually require data-related parameter tuning, which can be time-consuming for data-extensive tasks. To address this issue, we aim to find a family of weighted training losses ({\em weighted CD}) that requires no parameter tuning. To this end, we propose a search scheme, {\em Loss Distillation via Gradient Matching}, to find good candidate loss functions by mimicking the learning behavior in backpropagation between HyperCD and weighted CD. Once this is done, we propose a novel bilevel optimization formula to train the backbone network based on the weighted CD loss. We observe that: (1) with proper weighted functions, the weighted CD can always achieve similar performance to HyperCD, and (2) the Landau weighted CD, namely {\em Landau CD}, can outperform HyperCD for point cloud completion and lead to new state-of-the-art results on several benchmark datasets. {\it Our demo code is available at \url{https://github.com/Zhang-VISLab/IROS2024-LossDistillationWeightedCD}.} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06171v1-abstract-full').style.display = 'none'; document.getElementById('2409.06171v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 7 figures, 7 tables, this paper was accepted to IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03685">arXiv:2408.03685</a> <span> [<a href="https://arxiv.org/pdf/2408.03685">pdf</a>, <a href="https://arxiv.org/ps/2408.03685">ps</a>, <a href="https://arxiv.org/format/2408.03685">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> RL-ADN: A High-Performance Deep Reinforcement Learning Environment for Optimal Energy Storage Systems Dispatch in Active Distribution Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shengren Hou</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shuyi Gao</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+W">Weijie Xia</a>, <a href="/search/cs?searchtype=author&query=Duque%2C+E+M+S">Edgar Mauricio Salazar Duque</a>, <a href="/search/cs?searchtype=author&query=Palensky%2C+P">Peter Palensky</a>, <a href="/search/cs?searchtype=author&query=Vergara%2C+P+P">Pedro P. Vergara</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03685v2-abstract-short" style="display: inline;"> Deep Reinforcement Learning (DRL) presents a promising avenue for optimizing Energy Storage Systems (ESSs) dispatch in distribution networks. This paper introduces RL-ADN, an innovative open-source library specifically designed for solving the optimal ESSs dispatch in active distribution networks. RL-ADN offers unparalleled flexibility in modeling distribution networks, and ESSs, accommodating a w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03685v2-abstract-full').style.display = 'inline'; document.getElementById('2408.03685v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03685v2-abstract-full" style="display: none;"> Deep Reinforcement Learning (DRL) presents a promising avenue for optimizing Energy Storage Systems (ESSs) dispatch in distribution networks. This paper introduces RL-ADN, an innovative open-source library specifically designed for solving the optimal ESSs dispatch in active distribution networks. RL-ADN offers unparalleled flexibility in modeling distribution networks, and ESSs, accommodating a wide range of research goals. A standout feature of RL-ADN is its data augmentation module, based on Gaussian Mixture Model and Copula (GMC) functions, which elevates the performance ceiling of DRL agents. Additionally, RL-ADN incorporates the Laurent power flow solver, significantly reducing the computational burden of power flow calculations during training without sacrificing accuracy. The effectiveness of RL-ADN is demonstrated using in different sizes of distribution networks, showing marked performance improvements in the adaptability of DRL algorithms for ESS dispatch tasks. This enhancement is particularly beneficial from the increased diversity of training scenarios. Furthermore, RL-ADN achieves a tenfold increase in computational efficiency during training, making it highly suitable for large-scale network applications. The library sets a new benchmark in DRL-based ESSs dispatch in distribution networks and it is poised to advance DRL applications in distribution network operations significantly. RL-ADN is available at: https://github.com/ShengrenHou/RL-ADN and https://github.com/distributionnetworksTUDelft/RL-ADN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03685v2-abstract-full').style.display = 'none'; document.getElementById('2408.03685v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08954">arXiv:2407.08954</a> <span> [<a href="https://arxiv.org/pdf/2407.08954">pdf</a>, <a href="https://arxiv.org/format/2407.08954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> PriRoAgg: Achieving Robust Model Aggregation with Minimum Privacy Leakage for Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sizai Hou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Songze Li</a>, <a href="/search/cs?searchtype=author&query=Jahani-Nezhad%2C+T">Tayyebeh Jahani-Nezhad</a>, <a href="/search/cs?searchtype=author&query=Caire%2C+G">Giuseppe Caire</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08954v1-abstract-short" style="display: inline;"> Federated learning (FL) has recently gained significant momentum due to its potential to leverage large-scale distributed user data while preserving user privacy. However, the typical paradigm of FL faces challenges of both privacy and robustness: the transmitted model updates can potentially leak sensitive user information, and the lack of central control of the local training process leaves the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08954v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08954v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08954v1-abstract-full" style="display: none;"> Federated learning (FL) has recently gained significant momentum due to its potential to leverage large-scale distributed user data while preserving user privacy. However, the typical paradigm of FL faces challenges of both privacy and robustness: the transmitted model updates can potentially leak sensitive user information, and the lack of central control of the local training process leaves the global model susceptible to malicious manipulations on model updates. Current solutions attempting to address both problems under the one-server FL setting fall short in the following aspects: 1) designed for simple validity checks that are insufficient against advanced attacks (e.g., checking norm of individual update); and 2) partial privacy leakage for more complicated robust aggregation algorithms (e.g., distances between model updates are leaked for multi-Krum). In this work, we formalize a novel security notion of aggregated privacy that characterizes the minimum amount of user information, in the form of some aggregated statistics of users' updates, that is necessary to be revealed to accomplish more advanced robust aggregation. We develop a general framework PriRoAgg, utilizing Lagrange coded computing and distributed zero-knowledge proof, to execute a wide range of robust aggregation algorithms while satisfying aggregated privacy. As concrete instantiations of PriRoAgg, we construct two secure and robust protocols based on state-of-the-art robust algorithms, for which we provide full theoretical analyses on security and complexity. Extensive experiments are conducted for these protocols, demonstrating their robustness against various model integrity attacks, and their efficiency advantages over baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08954v1-abstract-full').style.display = 'none'; document.getElementById('2407.08954v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05250">arXiv:2407.05250</a> <span> [<a href="https://arxiv.org/pdf/2407.05250">pdf</a>, <a href="https://arxiv.org/format/2407.05250">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CLIMB: A Benchmark of Clinical Bias in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yubo Zhang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shudi Hou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+M+D">Mingyu Derek Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Muhao Chen</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jieyu Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05250v2-abstract-short" style="display: inline;"> Large language models (LLMs) are increasingly applied to clinical decision-making. However, their potential to exhibit bias poses significant risks to clinical equity. Currently, there is a lack of benchmarks that systematically evaluate such clinical bias in LLMs. While in downstream tasks, some biases of LLMs can be avoided such as by instructing the model to answer "I'm not sure...", the intern… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05250v2-abstract-full').style.display = 'inline'; document.getElementById('2407.05250v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05250v2-abstract-full" style="display: none;"> Large language models (LLMs) are increasingly applied to clinical decision-making. However, their potential to exhibit bias poses significant risks to clinical equity. Currently, there is a lack of benchmarks that systematically evaluate such clinical bias in LLMs. While in downstream tasks, some biases of LLMs can be avoided such as by instructing the model to answer "I'm not sure...", the internal bias hidden within the model still lacks deep studies. We introduce CLIMB (shorthand for A Benchmark of Clinical Bias in Large Language Models), a pioneering comprehensive benchmark to evaluate both intrinsic (within LLMs) and extrinsic (on downstream tasks) bias in LLMs for clinical decision tasks. Notably, for intrinsic bias, we introduce a novel metric, AssocMAD, to assess the disparities of LLMs across multiple demographic groups. Additionally, we leverage counterfactual intervention to evaluate extrinsic bias in a task of clinical diagnosis prediction. Our experiments across popular and medically adapted LLMs, particularly from the Mistral and LLaMA families, unveil prevalent behaviors with both intrinsic and extrinsic bias. This work underscores the critical need to mitigate clinical bias and sets a new standard for future evaluations of LLMs' clinical bias. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05250v2-abstract-full').style.display = 'none'; document.getElementById('2407.05250v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02521">arXiv:2407.02521</a> <span> [<a href="https://arxiv.org/pdf/2407.02521">pdf</a>, <a href="https://arxiv.org/format/2407.02521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Performance Comparison of Deep RL Algorithms for Mixed Traffic Cooperative Lane-Changing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xue Yao</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shengren Hou</a>, <a href="/search/cs?searchtype=author&query=Hoogendoorn%2C+S+P">Serge P. Hoogendoorn</a>, <a href="/search/cs?searchtype=author&query=Calvert%2C+S+C">Simeon C. Calvert</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02521v1-abstract-short" style="display: inline;"> Lane-changing (LC) is a challenging scenario for connected and automated vehicles (CAVs) because of the complex dynamics and high uncertainty of the traffic environment. This challenge can be handled by deep reinforcement learning (DRL) approaches, leveraging their data-driven and model-free nature. Our previous work proposed a cooperative lane-changing in mixed traffic (CLCMT) mechanism based on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02521v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02521v1-abstract-full" style="display: none;"> Lane-changing (LC) is a challenging scenario for connected and automated vehicles (CAVs) because of the complex dynamics and high uncertainty of the traffic environment. This challenge can be handled by deep reinforcement learning (DRL) approaches, leveraging their data-driven and model-free nature. Our previous work proposed a cooperative lane-changing in mixed traffic (CLCMT) mechanism based on TD3 to facilitate an optimal lane-changing strategy. This study enhances the current CLCMT mechanism by considering both the uncertainty of the human-driven vehicles (HVs) and the microscopic interactions between HVs and CAVs. The state-of-the-art (SOTA) DRL algorithms including DDPG, TD3, SAC, and PPO are utilized to deal with the formulated MDP with continuous actions. Performance comparison among the four DRL algorithms demonstrates that DDPG, TD3, and PPO algorithms can deal with uncertainty in traffic environments and learn well-performed LC strategies in terms of safety, efficiency, comfort, and ecology. The PPO algorithm outperforms the other three algorithms, regarding a higher reward, fewer exploration mistakes and crashes, and a more comfortable and ecology LC strategy. The improvements promise CLCMT mechanism greater advantages in the LC motion planning of CAVs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02521v1-abstract-full').style.display = 'none'; document.getElementById('2407.02521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 5 figures, IEEE conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09138">arXiv:2405.09138</a> <span> [<a href="https://arxiv.org/pdf/2405.09138">pdf</a>, <a href="https://arxiv.org/format/2405.09138">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OpenGait: A Comprehensive Benchmark Study for Gait Recognition towards Better Practicality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+C">Chao Fan</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Saihui Hou</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Junhao Liang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+C">Chuanfu Shen</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jingzhe Ma</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+D">Dongyang Jin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongzhen Huang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shiqi Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09138v1-abstract-short" style="display: inline;"> Gait recognition, a rapidly advancing vision technology for person identification from a distance, has made significant strides in indoor settings. However, evidence suggests that existing methods often yield unsatisfactory results when applied to newly released real-world gait datasets. Furthermore, conclusions drawn from indoor gait datasets may not easily generalize to outdoor ones. Therefore,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09138v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09138v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09138v1-abstract-full" style="display: none;"> Gait recognition, a rapidly advancing vision technology for person identification from a distance, has made significant strides in indoor settings. However, evidence suggests that existing methods often yield unsatisfactory results when applied to newly released real-world gait datasets. Furthermore, conclusions drawn from indoor gait datasets may not easily generalize to outdoor ones. Therefore, the primary goal of this work is to present a comprehensive benchmark study aimed at improving practicality rather than solely focusing on enhancing performance. To this end, we first develop OpenGait, a flexible and efficient gait recognition platform. Using OpenGait as a foundation, we conduct in-depth ablation experiments to revisit recent developments in gait recognition. Surprisingly, we detect some imperfect parts of certain prior methods thereby resulting in several critical yet undiscovered insights. Inspired by these findings, we develop three structurally simple yet empirically powerful and practically robust baseline models, i.e., DeepGaitV2, SkeletonGait, and SkeletonGait++, respectively representing the appearance-based, model-based, and multi-modal methodology for gait pattern description. Beyond achieving SoTA performances, more importantly, our careful exploration sheds new light on the modeling experience of deep gait models, the representational capacity of typical gait modalities, and so on. We hope this work can inspire further research and application of gait recognition towards better practicality. The code is available at https://github.com/ShiqiYu/OpenGait. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09138v1-abstract-full').style.display = 'none'; document.getElementById('2405.09138v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.19582">arXiv:2404.19582</a> <span> [<a href="https://arxiv.org/pdf/2404.19582">pdf</a>, <a href="https://arxiv.org/format/2404.19582">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> URVFL: Undetectable Data Reconstruction Attack on Vertical Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+D">Duanyi Yao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Songze Li</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+X">Xueluan Gong</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Sizai Hou</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+G">Gaoning Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.19582v2-abstract-short" style="display: inline;"> Launching effective malicious attacks in VFL presents unique challenges: 1) Firstly, given the distributed nature of clients' data features and models, each client rigorously guards its privacy and prohibits direct querying, complicating any attempts to steal data; 2) Existing malicious attacks alter the underlying VFL training task, and are hence easily detected by comparing the received gradient… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19582v2-abstract-full').style.display = 'inline'; document.getElementById('2404.19582v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.19582v2-abstract-full" style="display: none;"> Launching effective malicious attacks in VFL presents unique challenges: 1) Firstly, given the distributed nature of clients' data features and models, each client rigorously guards its privacy and prohibits direct querying, complicating any attempts to steal data; 2) Existing malicious attacks alter the underlying VFL training task, and are hence easily detected by comparing the received gradients with the ones received in honest training. To overcome these challenges, we develop URVFL, a novel attack strategy that evades current detection mechanisms. The key idea is to integrate a discriminator with auxiliary classifier that takes a full advantage of the label information and generates malicious gradients to the victim clients: on one hand, label information helps to better characterize embeddings of samples from distinct classes, yielding an improved reconstruction performance; on the other hand, computing malicious gradients with label information better mimics the honest training, making the malicious gradients indistinguishable from the honest ones, and the attack much more stealthy. Our comprehensive experiments demonstrate that URVFL significantly outperforms existing attacks, and successfully circumvents SOTA detection methods for malicious attacks. Additional ablation studies and evaluations on defenses further underscore the robustness and effectiveness of URVFL. Our code will be available at https://github.com/duanyiyao/URVFL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19582v2-abstract-full').style.display = 'none'; document.getElementById('2404.19582v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NDSS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17454">arXiv:2404.17454</a> <span> [<a href="https://arxiv.org/pdf/2404.17454">pdf</a>, <a href="https://arxiv.org/format/2404.17454">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Domain Adaptive and Fine-grained Anomaly Detection for Single-cell Sequencing Data and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kaichen Xu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yueyang Ding</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Suyang Hou</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+W">Weiqiang Zhan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+N">Nisang Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xiaobo Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17454v2-abstract-short" style="display: inline;"> Fined-grained anomalous cell detection from affected tissues is critical for clinical diagnosis and pathological research. Single-cell sequencing data provide unprecedented opportunities for this task. However, current anomaly detection methods struggle to handle domain shifts prevalent in multi-sample and multi-domain single-cell sequencing data, leading to suboptimal performance. Moreover, these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17454v2-abstract-full').style.display = 'inline'; document.getElementById('2404.17454v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17454v2-abstract-full" style="display: none;"> Fined-grained anomalous cell detection from affected tissues is critical for clinical diagnosis and pathological research. Single-cell sequencing data provide unprecedented opportunities for this task. However, current anomaly detection methods struggle to handle domain shifts prevalent in multi-sample and multi-domain single-cell sequencing data, leading to suboptimal performance. Moreover, these methods fall short of distinguishing anomalous cells into pathologically distinct subtypes. In response, we propose ACSleuth, a novel, reconstruction deviation-guided generative framework that integrates the detection, domain adaptation, and fine-grained annotating of anomalous cells into a methodologically cohesive workflow. Notably, we present the first theoretical analysis of using reconstruction deviations output by generative models for anomaly detection in lieu of domain shifts. This analysis informs us to develop a novel and superior maximum mean discrepancy-based anomaly scorer in ACSleuth. Extensive benchmarks over various single-cell data and other types of tabular data demonstrate ACSleuth's superiority over the state-of-the-art methods in identifying and subtyping anomalies in multi-sample and multi-domain contexts. Our code is available at https://github.com/Catchxu/ACsleuth. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17454v2-abstract-full').style.display = 'none'; document.getElementById('2404.17454v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 2 figures. Accepted by IJCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14248">arXiv:2404.14248</a> <span> [<a href="https://arxiv.org/pdf/2404.14248">pdf</a>, <a href="https://arxiv.org/format/2404.14248">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2024 Challenge on Low Light Image Enhancement: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaoning Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zongwei Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ao Li</a>, <a href="/search/cs?searchtype=author&query=Vasluianu%2C+F">Florin-Alexandru Vasluianu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yulun Zhang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Le Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+C">Ce Zhu</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zhi Jin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Hongjun Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chenxi Wang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+H">Haitao Ling</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yuanhao Cai</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+H">Hao Bian</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuxin Zheng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jing Lin</a>, <a href="/search/cs?searchtype=author&query=Yuille%2C+A">Alan Yuille</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+B">Ben Shao</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jin Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianli Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Mohao Wu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yixu Feng</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuo Hou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Haotian Lin</a> , et al. (87 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14248v1-abstract-short" style="display: inline;"> This paper reviews the NTIRE 2024 low light image enhancement challenge, highlighting the proposed solutions and results. The aim of this challenge is to discover an effective network design or solution capable of generating brighter, clearer, and visually appealing results when dealing with a variety of conditions, including ultra-high resolution (4K and beyond), non-uniform illumination, backlig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14248v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14248v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14248v1-abstract-full" style="display: none;"> This paper reviews the NTIRE 2024 low light image enhancement challenge, highlighting the proposed solutions and results. The aim of this challenge is to discover an effective network design or solution capable of generating brighter, clearer, and visually appealing results when dealing with a variety of conditions, including ultra-high resolution (4K and beyond), non-uniform illumination, backlighting, extreme darkness, and night scenes. A notable total of 428 participants registered for the challenge, with 22 teams ultimately making valid submissions. This paper meticulously evaluates the state-of-the-art advancements in enhancing low-light images, reflecting the significant progress and creativity in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14248v1-abstract-full').style.display = 'none'; document.getElementById('2404.14248v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NTIRE 2024 Challenge Report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13146">arXiv:2404.13146</a> <span> [<a href="https://arxiv.org/pdf/2404.13146">pdf</a>, <a href="https://arxiv.org/format/2404.13146">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DeepFake-O-Meter v2.0: An Open Platform for DeepFake Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ju%2C+Y">Yan Ju</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Chengzhe Sun</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+S">Shan Jia</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuwei Hou</a>, <a href="/search/cs?searchtype=author&query=Si%2C+Z">Zhaofeng Si</a>, <a href="/search/cs?searchtype=author&query=Datta%2C+S+K">Soumyya Kanti Datta</a>, <a href="/search/cs?searchtype=author&query=Ke%2C+L">Lipeng Ke</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+R">Riky Zhou</a>, <a href="/search/cs?searchtype=author&query=Nikolich%2C+A">Anita Nikolich</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+S">Siwei Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13146v2-abstract-short" style="display: inline;"> Deepfakes, as AI-generated media, have increasingly threatened media integrity and personal privacy with realistic yet fake digital content. In this work, we introduce an open-source and user-friendly online platform, DeepFake-O-Meter v2.0, that integrates state-of-the-art methods for detecting Deepfake images, videos, and audio. Built upon DeepFake-O-Meter v1.0, we have made significant upgrades… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13146v2-abstract-full').style.display = 'inline'; document.getElementById('2404.13146v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13146v2-abstract-full" style="display: none;"> Deepfakes, as AI-generated media, have increasingly threatened media integrity and personal privacy with realistic yet fake digital content. In this work, we introduce an open-source and user-friendly online platform, DeepFake-O-Meter v2.0, that integrates state-of-the-art methods for detecting Deepfake images, videos, and audio. Built upon DeepFake-O-Meter v1.0, we have made significant upgrades and improvements in platform architecture design, including user interaction, detector integration, job balancing, and security management. The platform aims to offer everyday users a convenient service for analyzing DeepFake media using multiple state-of-the-art detection algorithms. It ensures secure and private delivery of the analysis results. Furthermore, it serves as an evaluation and benchmarking platform for researchers in digital media forensics to compare the performance of multiple algorithms on the same input. We have also conducted detailed usage analysis based on the collected data to gain deeper insights into our platform's statistics. This involves analyzing two-month trends in user activity and evaluating the processing efficiency of each detector. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13146v2-abstract-full').style.display = 'none'; document.getElementById('2404.13146v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09499">arXiv:2404.09499</a> <span> [<a href="https://arxiv.org/pdf/2404.09499">pdf</a>, <a href="https://arxiv.org/format/2404.09499">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Learning Human Motion from Monocular Videos via Cross-Modal Manifold Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuaiying Hou</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+H">Hongyu Tao</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+J">Junheng Fang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+C">Changqing Zou</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+H">Hujun Bao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Weiwei Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09499v1-abstract-short" style="display: inline;"> Learning 3D human motion from 2D inputs is a fundamental task in the realms of computer vision and computer graphics. Many previous methods grapple with this inherently ambiguous task by introducing motion priors into the learning process. However, these approaches face difficulties in defining the complete configurations of such priors or training a robust model. In this paper, we present the Vid… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09499v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09499v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09499v1-abstract-full" style="display: none;"> Learning 3D human motion from 2D inputs is a fundamental task in the realms of computer vision and computer graphics. Many previous methods grapple with this inherently ambiguous task by introducing motion priors into the learning process. However, these approaches face difficulties in defining the complete configurations of such priors or training a robust model. In this paper, we present the Video-to-Motion Generator (VTM), which leverages motion priors through cross-modal latent feature space alignment between 3D human motion and 2D inputs, namely videos and 2D keypoints. To reduce the complexity of modeling motion priors, we model the motion data separately for the upper and lower body parts. Additionally, we align the motion data with a scale-invariant virtual skeleton to mitigate the interference of human skeleton variations to the motion priors. Evaluated on AIST++, the VTM showcases state-of-the-art performance in reconstructing 3D human motion from monocular videos. Notably, our VTM exhibits the capabilities for generalization to unseen view angles and in-the-wild videos. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09499v1-abstract-full').style.display = 'none'; document.getElementById('2404.09499v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06107">arXiv:2404.06107</a> <span> [<a href="https://arxiv.org/pdf/2404.06107">pdf</a>, <a href="https://arxiv.org/format/2404.06107">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Necessity of Visual Modality in Multimodal Machine Translation using Authentic Datasets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Long%2C+Z">Zi Long</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenhao Tang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+X">Xianghua Fu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shilong Hou</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+J">Jinze Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06107v1-abstract-short" style="display: inline;"> Recent research in the field of multimodal machine translation (MMT) has indicated that the visual modality is either dispensable or offers only marginal advantages. However, most of these conclusions are drawn from the analysis of experimental results based on a limited set of bilingual sentence-image pairs, such as Multi30k. In these kinds of datasets, the content of one bilingual parallel sente… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06107v1-abstract-full').style.display = 'inline'; document.getElementById('2404.06107v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06107v1-abstract-full" style="display: none;"> Recent research in the field of multimodal machine translation (MMT) has indicated that the visual modality is either dispensable or offers only marginal advantages. However, most of these conclusions are drawn from the analysis of experimental results based on a limited set of bilingual sentence-image pairs, such as Multi30k. In these kinds of datasets, the content of one bilingual parallel sentence pair must be well represented by a manually annotated image, which is different from the real-world translation scenario. In this work, we adhere to the universal multimodal machine translation framework proposed by Tang et al. (2022). This approach allows us to delve into the impact of the visual modality on translation efficacy by leveraging real-world translation datasets. Through a comprehensive exploration via probing tasks, we find that the visual modality proves advantageous for the majority of authentic translation datasets. Notably, the translation performance primarily hinges on the alignment and coherence between textual and visual contents. Furthermore, our results suggest that visual information serves a supplementary role in multimodal translation and can be substituted. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06107v1-abstract-full').style.display = 'none'; document.getElementById('2404.06107v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">bucc 2024 accepted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.08820">arXiv:2403.08820</a> <span> [<a href="https://arxiv.org/pdf/2403.08820">pdf</a>, <a href="https://arxiv.org/format/2403.08820">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Diet-ODIN: A Novel Framework for Opioid Misuse Detection with Interpretable Dietary Patterns </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zehong Wang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shifu Hou</a>, <a href="/search/cs?searchtype=author&query=Hall%2C+E">Evan Hall</a>, <a href="/search/cs?searchtype=author&query=Bachman%2C+L">Landon Bachman</a>, <a href="/search/cs?searchtype=author&query=Galassi%2C+V">Vincent Galassi</a>, <a href="/search/cs?searchtype=author&query=White%2C+J">Jasmine White</a>, <a href="/search/cs?searchtype=author&query=Chawla%2C+N+V">Nitesh V. Chawla</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chuxu Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yanfang Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.08820v1-abstract-short" style="display: inline;"> The opioid crisis has been one of the most critical society concerns in the United States. Although the medication assisted treatment (MAT) is recognized as the most effective treatment for opioid misuse and addiction, the various side effects can trigger opioid relapse. In addition to MAT, the dietary nutrition intervention has been demonstrated its importance in opioid misuse prevention and reco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08820v1-abstract-full').style.display = 'inline'; document.getElementById('2403.08820v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.08820v1-abstract-full" style="display: none;"> The opioid crisis has been one of the most critical society concerns in the United States. Although the medication assisted treatment (MAT) is recognized as the most effective treatment for opioid misuse and addiction, the various side effects can trigger opioid relapse. In addition to MAT, the dietary nutrition intervention has been demonstrated its importance in opioid misuse prevention and recovery. However, research on the alarming connections between dietary patterns and opioid misuse remain under-explored. In response to this gap, in this paper, we first establish a large-scale multifaceted dietary benchmark dataset related to opioid users at the first attempt and then develop a novel framework - i.e., namely Opioid Misuse Detection with Interpretable Dietary Patterns (Diet-ODIN) - to bridge heterogeneous graph (HG) and large language model (LLM) for the identification of users with opioid misuse and the interpretation of their associated dietary patterns. Specifically, in Diet-ODIN, we first construct an HG to comprehensively incorporate both dietary and health-related information, and then we devise a holistic graph learning framework with noise reduction to fully capitalize both users' individual dietary habits and shared dietary patterns for the detection of users with opioid misuse. To further delve into the intricate correlations between dietary patterns and opioid misuse, we exploit an LLM by utilizing the knowledge obtained from the graph learning model for interpretation. The extensive experimental results based on our established benchmark with quantitative and qualitative measures demonstrate the outstanding performance of Diet-ODIN in exploring the complex interplay between opioid misuse and dietary patterns, by comparison with state-of-the-art baseline methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08820v1-abstract-full').style.display = 'none'; document.getElementById('2403.08820v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05530">arXiv:2403.05530</a> <span> [<a href="https://arxiv.org/pdf/2403.05530">pdf</a>, <a href="https://arxiv.org/format/2403.05530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+V+I">Ving Ian Lei</a>, <a href="/search/cs?searchtype=author&query=Burnell%2C+R">Ryan Burnell</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Libin Bai</a>, <a href="/search/cs?searchtype=author&query=Gulati%2C+A">Anmol Gulati</a>, <a href="/search/cs?searchtype=author&query=Tanzer%2C+G">Garrett Tanzer</a>, <a href="/search/cs?searchtype=author&query=Vincent%2C+D">Damien Vincent</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zhufeng Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shibo Wang</a>, <a href="/search/cs?searchtype=author&query=Mariooryad%2C+S">Soroosh Mariooryad</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yifan Ding</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+X">Xinyang Geng</a>, <a href="/search/cs?searchtype=author&query=Alcober%2C+F">Fred Alcober</a>, <a href="/search/cs?searchtype=author&query=Frostig%2C+R">Roy Frostig</a>, <a href="/search/cs?searchtype=author&query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&query=Walker%2C+L">Lexi Walker</a>, <a href="/search/cs?searchtype=author&query=Paduraru%2C+C">Cosmin Paduraru</a>, <a href="/search/cs?searchtype=author&query=Sorokin%2C+C">Christina Sorokin</a>, <a href="/search/cs?searchtype=author&query=Tacchetti%2C+A">Andrea Tacchetti</a>, <a href="/search/cs?searchtype=author&query=Gaffney%2C+C">Colin Gaffney</a>, <a href="/search/cs?searchtype=author&query=Daruki%2C+S">Samira Daruki</a>, <a href="/search/cs?searchtype=author&query=Sercinoglu%2C+O">Olcan Sercinoglu</a>, <a href="/search/cs?searchtype=author&query=Gleicher%2C+Z">Zach Gleicher</a>, <a href="/search/cs?searchtype=author&query=Love%2C+J">Juliette Love</a> , et al. (1112 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05530v5-abstract-short" style="display: inline;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'inline'; document.getElementById('2403.05530v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05530v5-abstract-full" style="display: none;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February version on the great majority of capabilities and benchmarks; (2) Gemini 1.5 Flash, a more lightweight variant designed for efficiency with minimal regression in quality. Gemini 1.5 models achieve near-perfect recall on long-context retrieval tasks across modalities, improve the state-of-the-art in long-document QA, long-video QA and long-context ASR, and match or surpass Gemini 1.0 Ultra's state-of-the-art performance across a broad set of benchmarks. Studying the limits of Gemini 1.5's long-context ability, we find continued improvement in next-token prediction and near-perfect retrieval (>99%) up to at least 10M tokens, a generational leap over existing models such as Claude 3.0 (200k) and GPT-4 Turbo (128k). Finally, we highlight real-world use cases, such as Gemini 1.5 collaborating with professionals on completing their tasks achieving 26 to 75% time savings across 10 different job categories, as well as surprising new capabilities of large language models at the frontier; when given a grammar manual for Kalamang, a language with fewer than 200 speakers worldwide, the model learns to translate English to Kalamang at a similar level to a person who learned from the same content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'none'; document.getElementById('2403.05530v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.04444">arXiv:2403.04444</a> <span> [<a href="https://arxiv.org/pdf/2403.04444">pdf</a>, <a href="https://arxiv.org/format/2403.04444">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Disentangled Diffusion-Based 3D Human Pose Estimation with Hierarchical Spatial and Temporal Denoiser </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+Q">Qingyuan Cai</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuecai Hu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Saihui Hou</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+L">Li Yao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongzhen Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.04444v1-abstract-short" style="display: inline;"> Recently, diffusion-based methods for monocular 3D human pose estimation have achieved state-of-the-art (SOTA) performance by directly regressing the 3D joint coordinates from the 2D pose sequence. Although some methods decompose the task into bone length and bone direction prediction based on the human anatomical skeleton to explicitly incorporate more human body prior constraints, the performanc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04444v1-abstract-full').style.display = 'inline'; document.getElementById('2403.04444v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.04444v1-abstract-full" style="display: none;"> Recently, diffusion-based methods for monocular 3D human pose estimation have achieved state-of-the-art (SOTA) performance by directly regressing the 3D joint coordinates from the 2D pose sequence. Although some methods decompose the task into bone length and bone direction prediction based on the human anatomical skeleton to explicitly incorporate more human body prior constraints, the performance of these methods is significantly lower than that of the SOTA diffusion-based methods. This can be attributed to the tree structure of the human skeleton. Direct application of the disentangled method could amplify the accumulation of hierarchical errors, propagating through each hierarchy. Meanwhile, the hierarchical information has not been fully explored by the previous methods. To address these problems, a Disentangled Diffusion-based 3D Human Pose Estimation method with Hierarchical Spatial and Temporal Denoiser is proposed, termed DDHPose. In our approach: (1) We disentangle the 3D pose and diffuse the bone length and bone direction during the forward process of the diffusion model to effectively model the human pose prior. A disentanglement loss is proposed to supervise diffusion model learning. (2) For the reverse process, we propose Hierarchical Spatial and Temporal Denoiser (HSTDenoiser) to improve the hierarchical modeling of each joint. Our HSTDenoiser comprises two components: the Hierarchical-Related Spatial Transformer (HRST) and the Hierarchical-Related Temporal Transformer (HRTT). HRST exploits joint spatial information and the influence of the parent joint on each joint for spatial modeling, while HRTT utilizes information from both the joint and its hierarchical adjacent joints to explore the hierarchical temporal correlations among joints. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04444v1-abstract-full').style.display = 'none'; document.getElementById('2403.04444v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03063">arXiv:2403.03063</a> <span> [<a href="https://arxiv.org/pdf/2403.03063">pdf</a>, <a href="https://arxiv.org/format/2403.03063">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICRA57147.2024.10611660">10.1109/ICRA57147.2024.10611660 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> CrackNex: a Few-shot Low-light Crack Segmentation Model Based on Retinex Theory for UAV Inspections </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+Z">Zhen Yao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuhang Hou</a>, <a href="/search/cs?searchtype=author&query=Chuah%2C+M+C">Mooi Choo Chuah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03063v1-abstract-short" style="display: inline;"> Routine visual inspections of concrete structures are imperative for upholding the safety and integrity of critical infrastructure. Such visual inspections sometimes happen under low-light conditions, e.g., checking for bridge health. Crack segmentation under such conditions is challenging due to the poor contrast between cracks and their surroundings. However, most deep learning methods are desig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03063v1-abstract-full').style.display = 'inline'; document.getElementById('2403.03063v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03063v1-abstract-full" style="display: none;"> Routine visual inspections of concrete structures are imperative for upholding the safety and integrity of critical infrastructure. Such visual inspections sometimes happen under low-light conditions, e.g., checking for bridge health. Crack segmentation under such conditions is challenging due to the poor contrast between cracks and their surroundings. However, most deep learning methods are designed for well-illuminated crack images and hence their performance drops dramatically in low-light scenes. In addition, conventional approaches require many annotated low-light crack images which is time-consuming. In this paper, we address these challenges by proposing CrackNex, a framework that utilizes reflectance information based on Retinex Theory to help the model learn a unified illumination-invariant representation. Furthermore, we utilize few-shot segmentation to solve the inefficient training data problem. In CrackNex, both a support prototype and a reflectance prototype are extracted from the support set. Then, a prototype fusion module is designed to integrate the features from both prototypes. CrackNex outperforms the SOTA methods on multiple datasets. Additionally, we present the first benchmark dataset, LCSD, for low-light crack segmentation. LCSD consists of 102 well-illuminated crack images and 41 low-light crack images. The dataset and code are available at https://github.com/zy1296/CrackNex. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03063v1-abstract-full').style.display = 'none'; document.getElementById('2403.03063v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures, IEEE International Conference on Robotics and Automation (ICRA) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00862">arXiv:2403.00862</a> <span> [<a href="https://arxiv.org/pdf/2403.00862">pdf</a>, <a href="https://arxiv.org/format/2403.00862">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> NewsBench: A Systematic Evaluation Framework for Assessing Editorial Capabilities of Large Language Models in Chinese Journalism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+M">Miao Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Ming-Bin Chen</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+B">Bo Tang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shengbin Hou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengyu Wang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+H">Haiying Deng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiyu Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+F">Feiyu Xiong</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+K">Keming Mao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+P">Peng Cheng</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yi Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00862v4-abstract-short" style="display: inline;"> We present NewsBench, a novel evaluation framework to systematically assess the capabilities of Large Language Models (LLMs) for editorial capabilities in Chinese journalism. Our constructed benchmark dataset is focused on four facets of writing proficiency and six facets of safety adherence, and it comprises manually and carefully designed 1,267 test samples in the types of multiple choice questi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00862v4-abstract-full').style.display = 'inline'; document.getElementById('2403.00862v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00862v4-abstract-full" style="display: none;"> We present NewsBench, a novel evaluation framework to systematically assess the capabilities of Large Language Models (LLMs) for editorial capabilities in Chinese journalism. Our constructed benchmark dataset is focused on four facets of writing proficiency and six facets of safety adherence, and it comprises manually and carefully designed 1,267 test samples in the types of multiple choice questions and short answer questions for five editorial tasks in 24 news domains. To measure performances, we propose different GPT-4 based automatic evaluation protocols to assess LLM generations for short answer questions in terms of writing proficiency and safety adherence, and both are validated by the high correlations with human evaluations. Based on the systematic evaluation framework, we conduct a comprehensive analysis of ten popular LLMs which can handle Chinese. The experimental results highlight GPT-4 and ERNIE Bot as top performers, yet reveal a relative deficiency in journalistic safety adherence in creative writing tasks. Our findings also underscore the need for enhanced ethical guidance in machine-generated journalistic content, marking a step forward in aligning LLMs with journalistic standards and safety considerations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00862v4-abstract-full').style.display = 'none'; document.getElementById('2403.00862v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Long paper, ACL 2024 Main</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13531">arXiv:2401.13531</a> <span> [<a href="https://arxiv.org/pdf/2401.13531">pdf</a>, <a href="https://arxiv.org/format/2401.13531">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> QAGait: Revisit Gait Recognition from a Quality Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zengbin Wang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Saihui Hou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Man Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xu Liu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+C">Chunshui Cao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongzhen Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Peipei Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+S">Shibiao Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13531v1-abstract-short" style="display: inline;"> Gait recognition is a promising biometric method that aims to identify pedestrians from their unique walking patterns. Silhouette modality, renowned for its easy acquisition, simple structure, sparse representation, and convenient modeling, has been widely employed in controlled in-the-lab research. However, as gait recognition rapidly advances from in-the-lab to in-the-wild scenarios, various con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13531v1-abstract-full').style.display = 'inline'; document.getElementById('2401.13531v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13531v1-abstract-full" style="display: none;"> Gait recognition is a promising biometric method that aims to identify pedestrians from their unique walking patterns. Silhouette modality, renowned for its easy acquisition, simple structure, sparse representation, and convenient modeling, has been widely employed in controlled in-the-lab research. However, as gait recognition rapidly advances from in-the-lab to in-the-wild scenarios, various conditions raise significant challenges for silhouette modality, including 1) unidentifiable low-quality silhouettes (abnormal segmentation, severe occlusion, or even non-human shape), and 2) identifiable but challenging silhouettes (background noise, non-standard posture, slight occlusion). To address these challenges, we revisit gait recognition pipeline and approach gait recognition from a quality perspective, namely QAGait. Specifically, we propose a series of cost-effective quality assessment strategies, including Maxmial Connect Area and Template Match to eliminate background noises and unidentifiable silhouettes, Alignment strategy to handle non-standard postures. We also propose two quality-aware loss functions to integrate silhouette quality into optimization within the embedding space. Extensive experiments demonstrate our QAGait can guarantee both gait reliability and performance enhancement. Furthermore, our quality assessment strategies can seamlessly integrate with existing gait datasets, showcasing our superiority. Code is available at https://github.com/wzb-bupt/QAGait. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13531v1-abstract-full').style.display = 'none'; document.getElementById('2401.13531v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03851">arXiv:2401.03851</a> <span> [<a href="https://arxiv.org/pdf/2401.03851">pdf</a>, <a href="https://arxiv.org/format/2401.03851">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Aligned with LLM: a new multi-modal training paradigm for encoding fMRI activity in visual cortex </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shuxiao Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Linyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Senbao Hou</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+B">Bin Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03851v1-abstract-short" style="display: inline;"> Recently, there has been a surge in the popularity of pre trained large language models (LLMs) (such as GPT-4), sweeping across the entire Natural Language Processing (NLP) and Computer Vision (CV) communities. These LLMs have demonstrated advanced multi-modal understanding capabilities and showcased strong performance across various benchmarks. The LLM has started to embody traits of artificial g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03851v1-abstract-full').style.display = 'inline'; document.getElementById('2401.03851v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03851v1-abstract-full" style="display: none;"> Recently, there has been a surge in the popularity of pre trained large language models (LLMs) (such as GPT-4), sweeping across the entire Natural Language Processing (NLP) and Computer Vision (CV) communities. These LLMs have demonstrated advanced multi-modal understanding capabilities and showcased strong performance across various benchmarks. The LLM has started to embody traits of artificial general intelligence, which holds vital guidance for enhancing brain-like characteristics within visual encoding models. Hence, This paper proposes a new multi-modal training paradigm, aligning with LLM, for encoding fMRI activity in visual cortex. Based on this paradigm, we trained an encoding model in fMRI data named the LLM-Visual Encoding Model (LLM-VEM). Specifically, we utilize LLM (miniGPT4) to generate descriptive text for all stimulus images, forming a high-quality textual description set. Moreover, we use the pre-trained text encoder (CLIP) to process these detailed descriptions, obtaining the text embedding features. Next, we use the contrast loss function to minimize the distance between the image embedding features and the text embedding features to complete the alignment operation of the stimulus image and text information. With the assistance of the pre-trained LLM, this alignment process facilitates better learning of the visual encoding model, resulting in higher precision. The final experimental results indicate that our training paradigm has significantly aided in enhancing the performance of the visual encoding model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03851v1-abstract-full').style.display = 'none'; document.getElementById('2401.03851v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11805">arXiv:2312.11805</a> <span> [<a href="https://arxiv.org/pdf/2312.11805">pdf</a>, <a href="https://arxiv.org/format/2312.11805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Gemini: A Family of Highly Capable Multimodal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&query=Borgeaud%2C+S">Sebastian Borgeaud</a>, <a href="/search/cs?searchtype=author&query=Alayrac%2C+J">Jean-Baptiste Alayrac</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jiahui Yu</a>, <a href="/search/cs?searchtype=author&query=Soricut%2C+R">Radu Soricut</a>, <a href="/search/cs?searchtype=author&query=Schalkwyk%2C+J">Johan Schalkwyk</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&query=Hauth%2C+A">Anja Hauth</a>, <a href="/search/cs?searchtype=author&query=Millican%2C+K">Katie Millican</a>, <a href="/search/cs?searchtype=author&query=Silver%2C+D">David Silver</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&query=Antonoglou%2C+I">Ioannis Antonoglou</a>, <a href="/search/cs?searchtype=author&query=Schrittwieser%2C+J">Julian Schrittwieser</a>, <a href="/search/cs?searchtype=author&query=Glaese%2C+A">Amelia Glaese</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jilin Chen</a>, <a href="/search/cs?searchtype=author&query=Pitler%2C+E">Emily Pitler</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Lazaridou%2C+A">Angeliki Lazaridou</a>, <a href="/search/cs?searchtype=author&query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&query=Molloy%2C+J">James Molloy</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Barham%2C+P+R">Paul R. Barham</a>, <a href="/search/cs?searchtype=author&query=Hennigan%2C+T">Tom Hennigan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+B">Benjamin Lee</a> , et al. (1325 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11805v4-abstract-short" style="display: inline;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'inline'; document.getElementById('2312.11805v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11805v4-abstract-full" style="display: none;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultra model advances the state of the art in 30 of 32 of these benchmarks - notably being the first model to achieve human-expert performance on the well-studied exam benchmark MMLU, and improving the state of the art in every one of the 20 multimodal benchmarks we examined. We believe that the new capabilities of the Gemini family in cross-modal reasoning and language understanding will enable a wide variety of use cases. We discuss our approach toward post-training and deploying Gemini models responsibly to users through services including Gemini, Gemini Advanced, Google AI Studio, and Cloud Vertex AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'none'; document.getElementById('2312.11805v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10661">arXiv:2312.10661</a> <span> [<a href="https://arxiv.org/pdf/2312.10661">pdf</a>, <a href="https://arxiv.org/format/2312.10661">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Wikiformer: Pre-training with Structured Information of Wikipedia for Ad-hoc Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+W">Weihang Su</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+Q">Qingyao Ai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiangsheng Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jia Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yiqun Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaolong Wu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shengluan Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10661v2-abstract-short" style="display: inline;"> With the development of deep learning and natural language processing techniques, pre-trained language models have been widely used to solve information retrieval (IR) problems. Benefiting from the pre-training and fine-tuning paradigm, these models achieve state-of-the-art performance. In previous works, plain texts in Wikipedia have been widely used in the pre-training stage. However, the rich s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10661v2-abstract-full').style.display = 'inline'; document.getElementById('2312.10661v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10661v2-abstract-full" style="display: none;"> With the development of deep learning and natural language processing techniques, pre-trained language models have been widely used to solve information retrieval (IR) problems. Benefiting from the pre-training and fine-tuning paradigm, these models achieve state-of-the-art performance. In previous works, plain texts in Wikipedia have been widely used in the pre-training stage. However, the rich structured information in Wikipedia, such as the titles, abstracts, hierarchical heading (multi-level title) structure, relationship between articles, references, hyperlink structures, and the writing organizations, has not been fully explored. In this paper, we devise four pre-training objectives tailored for IR tasks based on the structured knowledge of Wikipedia. Compared to existing pre-training methods, our approach can better capture the semantic knowledge in the training corpus by leveraging the human-edited structured data from Wikipedia. Experimental results on multiple IR benchmark datasets show the superior performance of our model in both zero-shot and fine-tuning settings compared to existing strong retrieval baselines. Besides, experimental results in biomedical and legal domains demonstrate that our approach achieves better performance in vertical domains compared to previous models, especially in scenarios where long text similarity matching is needed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10661v2-abstract-full').style.display = 'none'; document.getElementById('2312.10661v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Thirty-Eighth AAAI Conference on Artificial Intelligence (AAAI-24)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.16419">arXiv:2310.16419</a> <span> [<a href="https://arxiv.org/pdf/2310.16419">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Open Knowledge Base Canonicalization with Multi-task Unlearning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bingchen Liu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shihao Hou</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+W">Weixin Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiang Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shijun Liu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+L">Li Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.16419v1-abstract-short" style="display: inline;"> The construction of large open knowledge bases (OKBs) is integral to many applications in the field of mobile computing. Noun phrases and relational phrases in OKBs often suffer from redundancy and ambiguity, which calls for the investigation on OKB canonicalization. However, in order to meet the requirements of some privacy protection regulations and to ensure the timeliness of the data, the cano… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16419v1-abstract-full').style.display = 'inline'; document.getElementById('2310.16419v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.16419v1-abstract-full" style="display: none;"> The construction of large open knowledge bases (OKBs) is integral to many applications in the field of mobile computing. Noun phrases and relational phrases in OKBs often suffer from redundancy and ambiguity, which calls for the investigation on OKB canonicalization. However, in order to meet the requirements of some privacy protection regulations and to ensure the timeliness of the data, the canonicalized OKB often needs to remove some sensitive information or outdated data. The machine unlearning in OKB canonicalization is an excellent solution to the above problem. Current solutions address OKB canonicalization by devising advanced clustering algorithms and using knowledge graph embedding (KGE) to further facilitate the canonicalization process. Effective schemes are urgently needed to fully synergise machine unlearning with clustering and KGE learning. To this end, we put forward a multi-task unlearning framework, namely MulCanon, to tackle machine unlearning problem in OKB canonicalization. Specifically, the noise characteristics in the diffusion model are utilized to achieve the effect of machine unlearning for data in OKB. MulCanon unifies the learning objectives of diffusion model, KGE and clustering algorithms, and adopts a two-step multi-task learning paradigm for training. A thorough experimental study on popular OKB canonicalization datasets validates that MulCanon achieves advanced machine unlearning effects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16419v1-abstract-full').style.display = 'none'; document.getElementById('2310.16419v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.12389">arXiv:2310.12389</a> <span> [<a href="https://arxiv.org/pdf/2310.12389">pdf</a>, <a href="https://arxiv.org/format/2310.12389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Quantum Computing for MIMO Beam Selection Problem: Model and Optical Experimental Solution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuhong Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenxin Li</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+C">Chengkang Pan</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuai Hou</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xian Lu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+C">Chunfeng Cui</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+J">Jingwei Wen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiaqi Xu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+C">Chongyu Cao</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yin Ma</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Hai Wei</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+K">Kai Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.12389v2-abstract-short" style="display: inline;"> Massive multiple-input multiple-output (MIMO) has gained widespread popularity in recent years due to its ability to increase data rates, improve signal quality, and provide better coverage in challenging environments. In this paper, we investigate the MIMO beam selection (MBS) problem, which is proven to be NP-hard and computationally intractable. To deal with this problem, quantum computing that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12389v2-abstract-full').style.display = 'inline'; document.getElementById('2310.12389v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.12389v2-abstract-full" style="display: none;"> Massive multiple-input multiple-output (MIMO) has gained widespread popularity in recent years due to its ability to increase data rates, improve signal quality, and provide better coverage in challenging environments. In this paper, we investigate the MIMO beam selection (MBS) problem, which is proven to be NP-hard and computationally intractable. To deal with this problem, quantum computing that can provide faster and more efficient solutions to large-scale combinatorial optimization is considered. MBS is formulated in a quadratic unbounded binary optimization form and solved with Coherent Ising Machine (CIM) physical machine. We compare the performance of our solution with two classic heuristics, simulated annealing and Tabu search. The results demonstrate an average performance improvement by a factor of 261.23 and 20.6, respectively, which shows that CIM-based solution performs significantly better in terms of selecting the optimal subset of beams. This work shows great promise for practical 5G operation and promotes the application of quantum computing in solving computationally hard problems in communication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12389v2-abstract-full').style.display = 'none'; document.getElementById('2310.12389v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Globecom 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04992">arXiv:2310.04992</a> <span> [<a href="https://arxiv.org/pdf/2310.04992">pdf</a>, <a href="https://arxiv.org/format/2310.04992">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VisionFM: a Multi-Modal Multi-Task Vision Foundation Model for Generalist Ophthalmic Artificial Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+J">Jianing Qiu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jian Wu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Hao Wei</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+P">Peilun Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Minqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yunyun Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lin Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hanruo Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongyi Liu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Simeng Hou</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yuyang Zhao</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xuehui Shi</a>, <a href="/search/cs?searchtype=author&query=Xian%2C+J">Junfang Xian</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+X">Xiaoxia Qu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Sirui Zhu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+L">Lijie Pan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaoniao Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaojia Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+S">Shuai Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kebing Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chenlong Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mingqiang Chen</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+S">Sujie Fan</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jianhua Hu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+A">Aiguo Lv</a> , et al. (17 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04992v1-abstract-short" style="display: inline;"> We present VisionFM, a foundation model pre-trained with 3.4 million ophthalmic images from 560,457 individuals, covering a broad range of ophthalmic diseases, modalities, imaging devices, and demography. After pre-training, VisionFM provides a foundation to foster multiple ophthalmic artificial intelligence (AI) applications, such as disease screening and diagnosis, disease prognosis, subclassifi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04992v1-abstract-full').style.display = 'inline'; document.getElementById('2310.04992v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04992v1-abstract-full" style="display: none;"> We present VisionFM, a foundation model pre-trained with 3.4 million ophthalmic images from 560,457 individuals, covering a broad range of ophthalmic diseases, modalities, imaging devices, and demography. After pre-training, VisionFM provides a foundation to foster multiple ophthalmic artificial intelligence (AI) applications, such as disease screening and diagnosis, disease prognosis, subclassification of disease phenotype, and systemic biomarker and disease prediction, with each application enhanced with expert-level intelligence and accuracy. The generalist intelligence of VisionFM outperformed ophthalmologists with basic and intermediate levels in jointly diagnosing 12 common ophthalmic diseases. Evaluated on a new large-scale ophthalmic disease diagnosis benchmark database, as well as a new large-scale segmentation and detection benchmark database, VisionFM outperformed strong baseline deep neural networks. The ophthalmic image representations learned by VisionFM exhibited noteworthy explainability, and demonstrated strong generalizability to new ophthalmic modalities, disease spectrum, and imaging devices. As a foundation model, VisionFM has a large capacity to learn from diverse ophthalmic imaging data and disparate datasets. To be commensurate with this capacity, in addition to the real data used for pre-training, we also generated and leveraged synthetic ophthalmic imaging data. Experimental results revealed that synthetic data that passed visual Turing tests, can also enhance the representation learning capability of VisionFM, leading to substantial performance gains on downstream ophthalmic AI tasks. Beyond the ophthalmic AI applications developed, validated, and demonstrated in this work, substantial further applications can be achieved in an efficient and cost-effective manner using VisionFM as the foundation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04992v1-abstract-full').style.display = 'none'; document.getElementById('2310.04992v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.08251">arXiv:2309.08251</a> <span> [<a href="https://arxiv.org/pdf/2309.08251">pdf</a>, <a href="https://arxiv.org/format/2309.08251">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cartoondiff: Training-free Cartoon Image Generation with Diffusion Transformer Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+F">Feihong He</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Gang Li</a>, <a href="/search/cs?searchtype=author&query=Si%2C+L">Lingyu Si</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Leilei Yan</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shimeng Hou</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Hongwei Dong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fanzhang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.08251v1-abstract-short" style="display: inline;"> Image cartoonization has attracted significant interest in the field of image generation. However, most of the existing image cartoonization techniques require re-training models using images of cartoon style. In this paper, we present CartoonDiff, a novel training-free sampling approach which generates image cartoonization using diffusion transformer models. Specifically, we decompose the reverse… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.08251v1-abstract-full').style.display = 'inline'; document.getElementById('2309.08251v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.08251v1-abstract-full" style="display: none;"> Image cartoonization has attracted significant interest in the field of image generation. However, most of the existing image cartoonization techniques require re-training models using images of cartoon style. In this paper, we present CartoonDiff, a novel training-free sampling approach which generates image cartoonization using diffusion transformer models. Specifically, we decompose the reverse process of diffusion models into the semantic generation phase and the detail generation phase. Furthermore, we implement the image cartoonization process by normalizing high-frequency signal of the noisy image in specific denoising steps. CartoonDiff doesn't require any additional reference images, complex model designs, or the tedious adjustment of multiple parameters. Extensive experimental results show the powerful ability of our CartoonDiff. The project page is available at: https://cartoondiff.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.08251v1-abstract-full').style.display = 'none'; document.getElementById('2309.08251v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages,5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.06682">arXiv:2309.06682</a> <span> [<a href="https://arxiv.org/pdf/2309.06682">pdf</a>, <a href="https://arxiv.org/format/2309.06682">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> A Novel Low-Cost, Recyclable, Easy-to-Build Robot Blimp For Transporting Supplies in Hard-to-Reach Locations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+K">Karen Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shuhang Hou</a>, <a href="/search/cs?searchtype=author&query=Negash%2C+M">Matyas Negash</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Jeffs%2C+E">Edward Jeffs</a>, <a href="/search/cs?searchtype=author&query=D%27Antonio%2C+D+S">Diego S. D'Antonio</a>, <a href="/search/cs?searchtype=author&query=Salda%C3%B1a%2C+D">David Salda帽a</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.06682v1-abstract-short" style="display: inline;"> Rural communities in remote areas often encounter significant challenges when it comes to accessing emergency healthcare services and essential supplies due to a lack of adequate transportation infrastructure. The situation is further exacerbated by poorly maintained, damaged, or flooded roads, making it arduous for rural residents to obtain the necessary aid in critical situations. Limited budget… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.06682v1-abstract-full').style.display = 'inline'; document.getElementById('2309.06682v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.06682v1-abstract-full" style="display: none;"> Rural communities in remote areas often encounter significant challenges when it comes to accessing emergency healthcare services and essential supplies due to a lack of adequate transportation infrastructure. The situation is further exacerbated by poorly maintained, damaged, or flooded roads, making it arduous for rural residents to obtain the necessary aid in critical situations. Limited budgets and technological constraints pose additional obstacles, hindering the prompt response of local rescue teams during emergencies. The transportation of crucial resources, such as medical supplies and food, plays a vital role in saving lives in these situations. In light of these obstacles, our objective is to improve accessibility and alleviate the suffering of vulnerable populations by automating transportation tasks using low-cost robotic systems. We propose a low-cost, easy-to-build blimp robot (UAVs), that can significantly enhance the efficiency and effectiveness of local emergency responses. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.06682v1-abstract-full').style.display = 'none'; document.getElementById('2309.06682v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Global Humanitarian Technology Conference (GHTC 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.01196">arXiv:2309.01196</a> <span> [<a href="https://arxiv.org/pdf/2309.01196">pdf</a>, <a href="https://arxiv.org/format/2309.01196">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Visual Interpretation-Based Self-Improved Classification System Using Virtual Adversarial Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+S">Shuai Jiang</a>, <a href="/search/cs?searchtype=author&query=Kamei%2C+S">Sayaka Kamei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chen Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shengzhe Hou</a>, <a href="/search/cs?searchtype=author&query=Morimoto%2C+Y">Yasuhiko Morimoto</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.01196v1-abstract-short" style="display: inline;"> The successful application of large pre-trained models such as BERT in natural language processing has attracted more attention from researchers. Since the BERT typically acts as an end-to-end black box, classification systems based on it usually have difficulty in interpretation and low robustness. This paper proposes a visual interpretation-based self-improving classification model with a combin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01196v1-abstract-full').style.display = 'inline'; document.getElementById('2309.01196v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.01196v1-abstract-full" style="display: none;"> The successful application of large pre-trained models such as BERT in natural language processing has attracted more attention from researchers. Since the BERT typically acts as an end-to-end black box, classification systems based on it usually have difficulty in interpretation and low robustness. This paper proposes a visual interpretation-based self-improving classification model with a combination of virtual adversarial training (VAT) and BERT models to address the above problems. Specifically, a fine-tuned BERT model is used as a classifier to classify the sentiment of the text. Then, the predicted sentiment classification labels are used as part of the input of another BERT for spam classification via a semi-supervised training manner using VAT. Additionally, visualization techniques, including visualizing the importance of words and normalizing the attention head matrix, are employed to analyze the relevance of each component to classification accuracy. Moreover, brand-new features will be found in the visual analysis, and classification performance will be improved. Experimental results on Twitter's tweet dataset demonstrate the effectiveness of the proposed model on the classification task. Furthermore, the ablation study results illustrate the effect of different components of the proposed model on the classification results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01196v1-abstract-full').style.display = 'none'; document.getElementById('2309.01196v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hou%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hou%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hou%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Hou%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>