CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 224 results for author: <span class="mathjax">Jin, W</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Jin%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Jin, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Jin%2C+W&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Jin, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jin%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15016">arXiv:2502.15016</a> <span> [<a href="https://arxiv.org/pdf/2502.15016">pdf</a>, <a href="https://arxiv.org/format/2502.15016">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> TimeDistill: Efficient Long-Term Time Series Forecasting with MLP via Cross-Architecture Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ni%2C+J">Juntong Ni</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zewen Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiyu Wang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+M">Ming Jin</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15016v1-abstract-short" style="display: inline;"> Transformer-based and CNN-based methods demonstrate strong performance in long-term time series forecasting. However, their high computational and storage requirements can hinder large-scale deployment. To address this limitation, we propose integrating lightweight MLP with advanced architectures using knowledge distillation (KD). Our preliminary study reveals different models can capture compleme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15016v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15016v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15016v1-abstract-full" style="display: none;"> Transformer-based and CNN-based methods demonstrate strong performance in long-term time series forecasting. However, their high computational and storage requirements can hinder large-scale deployment. To address this limitation, we propose integrating lightweight MLP with advanced architectures using knowledge distillation (KD). Our preliminary study reveals different models can capture complementary patterns, particularly multi-scale and multi-period patterns in the temporal and frequency domains. Based on this observation, we introduce TimeDistill, a cross-architecture KD framework that transfers these patterns from teacher models (e.g., Transformers, CNNs) to MLP. Additionally, we provide a theoretical analysis, demonstrating that our KD approach can be interpreted as a specialized form of mixup data augmentation. TimeDistill improves MLP performance by up to 18.6%, surpassing teacher models on eight datasets. It also achieves up to 7X faster inference and requires 130X fewer parameters. Furthermore, we conduct extensive evaluations to highlight the versatility and effectiveness of TimeDistill. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15016v1-abstract-full').style.display = 'none'; document.getElementById('2502.15016v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08244">arXiv:2502.08244</a> <span> [<a href="https://arxiv.org/pdf/2502.08244">pdf</a>, <a href="https://arxiv.org/format/2502.08244">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FloVD: Optical Flow Meets Video Diffusion Model for Enhanced Camera-Controlled Video Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wonjoon Jin</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+Q">Qi Dai</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+C">Chong Luo</a>, <a href="/search/cs?searchtype=author&query=Baek%2C+S">Seung-Hwan Baek</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+S">Sunghyun Cho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08244v1-abstract-short" style="display: inline;"> This paper presents FloVD, a novel optical-flow-based video diffusion model for camera-controllable video generation. FloVD leverages optical flow maps to represent motions of the camera and moving objects. This approach offers two key benefits. Since optical flow can be directly estimated from videos, our approach allows for the use of arbitrary training videos without ground-truth camera paramet… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08244v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08244v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08244v1-abstract-full" style="display: none;"> This paper presents FloVD, a novel optical-flow-based video diffusion model for camera-controllable video generation. FloVD leverages optical flow maps to represent motions of the camera and moving objects. This approach offers two key benefits. Since optical flow can be directly estimated from videos, our approach allows for the use of arbitrary training videos without ground-truth camera parameters. Moreover, as background optical flow encodes 3D correlation across different viewpoints, our method enables detailed camera control by leveraging the background motion. To synthesize natural object motion while supporting detailed camera control, our framework adopts a two-stage video synthesis pipeline consisting of optical flow generation and flow-conditioned video synthesis. Extensive experiments demonstrate the superiority of our method over previous approaches in terms of accurate camera control and natural object motion synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08244v1-abstract-full').style.display = 'none'; document.getElementById('2502.08244v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project website: https://jinwonjoon.github.io/flovd_site/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03393">arXiv:2502.03393</a> <span> [<a href="https://arxiv.org/pdf/2502.03393">pdf</a>, <a href="https://arxiv.org/format/2502.03393">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CAPE: Covariate-Adjusted Pre-Training for Epidemic Time Series Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zewen Liu</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+J">Juntong Ni</a>, <a href="/search/cs?searchtype=author&query=Lau%2C+M+S+Y">Max S. Y. Lau</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03393v1-abstract-short" style="display: inline;"> Accurate forecasting of epidemic infection trajectories is crucial for safeguarding public health. However, limited data availability during emerging outbreaks and the complex interaction between environmental factors and disease dynamics present significant challenges for effective forecasting. In response, we introduce CAPE, a novel epidemic pre-training framework designed to harness extensive d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03393v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03393v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03393v1-abstract-full" style="display: none;"> Accurate forecasting of epidemic infection trajectories is crucial for safeguarding public health. However, limited data availability during emerging outbreaks and the complex interaction between environmental factors and disease dynamics present significant challenges for effective forecasting. In response, we introduce CAPE, a novel epidemic pre-training framework designed to harness extensive disease datasets from diverse regions and integrate environmental factors directly into the modeling process for more informed decision-making on downstream diseases. Based on a covariate adjustment framework, CAPE utilizes pre-training combined with hierarchical environment contrasting to identify universal patterns across diseases while estimating latent environmental influences. We have compiled a diverse collection of epidemic time series datasets and validated the effectiveness of CAPE under various evaluation scenarios, including full-shot, few-shot, zero-shot, cross-location, and cross-disease settings, where it outperforms the leading baseline by an average of 9.9% in full-shot and 14.3% in zero-shot settings. The code will be released upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03393v1-abstract-full').style.display = 'none'; document.getElementById('2502.03393v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02921">arXiv:2502.02921</a> <span> [<a href="https://arxiv.org/pdf/2502.02921">pdf</a>, <a href="https://arxiv.org/format/2502.02921">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Robust Reward Alignment via Hypothesis Space Batch Cutting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhixian Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haode Zhang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yizhe Feng</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wanxin Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02921v2-abstract-short" style="display: inline;"> Reward design for reinforcement learning and optimal control agents is challenging. Preference-based alignment addresses this by enabling agents to learn rewards from ranked trajectory pairs provided by humans. However, existing methods often struggle from poor robustness to unknown false human preferences. In this work, we propose a robust and efficient reward alignment method based on a novel an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02921v2-abstract-full').style.display = 'inline'; document.getElementById('2502.02921v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02921v2-abstract-full" style="display: none;"> Reward design for reinforcement learning and optimal control agents is challenging. Preference-based alignment addresses this by enabling agents to learn rewards from ranked trajectory pairs provided by humans. However, existing methods often struggle from poor robustness to unknown false human preferences. In this work, we propose a robust and efficient reward alignment method based on a novel and geometrically interpretable perspective: hypothesis space batched cutting. Our method iteratively refines the reward hypothesis space through "cuts" based on batches of human preferences. Within each batch, human preferences, queried based on disagreement, are grouped using a voting function to determine the appropriate cut, ensuring a bounded human query complexity. To handle unknown erroneous preferences, we introduce a conservative cutting method within each batch, preventing erroneous human preferences from making overly aggressive cuts to the hypothesis space. This guarantees provable robustness against false preferences. We evaluate our method in a model predictive control setting across diverse tasks, including DM-Control, dexterous in-hand manipulation, and locomotion. The results demonstrate that our framework achieves comparable or superior performance to state-of-the-art methods in error-free settings while significantly outperforming existing method when handling high percentage of erroneous human preferences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02921v2-abstract-full').style.display = 'none'; document.getElementById('2502.02921v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, including appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07496">arXiv:2501.07496</a> <span> [<a href="https://arxiv.org/pdf/2501.07496">pdf</a>, <a href="https://arxiv.org/format/2501.07496">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Aligning First, Then Fusing: A Novel Weakly Supervised Multimodal Violence Detection Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wenping Jin</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Li Zhu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jing Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07496v1-abstract-short" style="display: inline;"> Weakly supervised violence detection refers to the technique of training models to identify violent segments in videos using only video-level labels. Among these approaches, multimodal violence detection, which integrates modalities such as audio and optical flow, holds great potential. Existing methods in this domain primarily focus on designing multimodal fusion models to address modality discre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07496v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07496v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07496v1-abstract-full" style="display: none;"> Weakly supervised violence detection refers to the technique of training models to identify violent segments in videos using only video-level labels. Among these approaches, multimodal violence detection, which integrates modalities such as audio and optical flow, holds great potential. Existing methods in this domain primarily focus on designing multimodal fusion models to address modality discrepancies. In contrast, we take a different approach; leveraging the inherent discrepancies across modalities in violence event representation to propose a novel multimodal semantic feature alignment method. This method sparsely maps the semantic features of local, transient, and less informative modalities ( such as audio and optical flow ) into the more informative RGB semantic feature space. Through an iterative process, the method identifies the suitable no-zero feature matching subspace and aligns the modality-specific event representations based on this subspace, enabling the full exploitation of information from all modalities during the subsequent modality fusion stage. Building on this, we design a new weakly supervised violence detection framework that consists of unimodal multiple-instance learning for extracting unimodal semantic features, multimodal alignment, multimodal fusion, and final detection. Experimental results on benchmark datasets demonstrate the effectiveness of our method, achieving an average precision (AP) of 86.07% on the XD-Violence dataset. Our code is available at https://github.com/xjpp2016/MAVD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07496v1-abstract-full').style.display = 'none'; document.getElementById('2501.07496v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18239">arXiv:2412.18239</a> <span> [<a href="https://arxiv.org/pdf/2412.18239">pdf</a>, <a href="https://arxiv.org/format/2412.18239">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OMG-HD: A High-Resolution AI Weather Model for End-to-End Forecasts from Observations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+P">Pengcheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+J">Jiang Bian</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Z">Zekun Ni</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weixin Jin</a>, <a href="/search/cs?searchtype=author&query=Weyn%2C+J">Jonathan Weyn</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Z">Zuliang Fang</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Siqi Xiang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Haiyu Dong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bin Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hongyu Sun</a>, <a href="/search/cs?searchtype=author&query=Thambiratnam%2C+K">Kit Thambiratnam</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18239v1-abstract-short" style="display: inline;"> In recent years, Artificial Intelligence Weather Prediction (AIWP) models have achieved performance comparable to, or even surpassing, traditional Numerical Weather Prediction (NWP) models by leveraging reanalysis data. However, a less-explored approach involves training AIWP models directly on observational data, enhancing computational efficiency and improving forecast accuracy by reducing the u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18239v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18239v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18239v1-abstract-full" style="display: none;"> In recent years, Artificial Intelligence Weather Prediction (AIWP) models have achieved performance comparable to, or even surpassing, traditional Numerical Weather Prediction (NWP) models by leveraging reanalysis data. However, a less-explored approach involves training AIWP models directly on observational data, enhancing computational efficiency and improving forecast accuracy by reducing the uncertainties introduced through data assimilation processes. In this study, we propose OMG-HD, a novel AI-based regional high-resolution weather forecasting model designed to make predictions directly from observational data sources, including surface stations, radar, and satellite, thereby removing the need for operational data assimilation. Our evaluation shows that OMG-HD outperforms both the European Centre for Medium-Range Weather Forecasts (ECMWF)'s high-resolution operational forecasting system, IFS-HRES, and the High-Resolution Rapid Refresh (HRRR) model at lead times of up to 12 hours across the contiguous United States (CONUS) region. We achieve up to a 13% improvement on RMSE for 2-meter temperature, 17% on 10-meter wind speed, 48% on 2-meter specific humidity, and 32% on surface pressure compared to HRRR. Our method shows that it is possible to use AI-driven approaches for rapid weather predictions without relying on NWP-derived weather fields as model input. This is a promising step towards using observational data directly to make operational forecasts with AIWP models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18239v1-abstract-full').style.display = 'none'; document.getElementById('2412.18239v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10981">arXiv:2412.10981</a> <span> [<a href="https://arxiv.org/pdf/2412.10981">pdf</a>, <a href="https://arxiv.org/format/2412.10981">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1002/aaai.12085">10.1002/aaai.12085 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Hybrid Forecasting of Geopolitical Events </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Benjamin%2C+D+M">Daniel M. Benjamin</a>, <a href="/search/cs?searchtype=author&query=Morstatter%2C+F">Fred Morstatter</a>, <a href="/search/cs?searchtype=author&query=Abbas%2C+A+E">Ali E. Abbas</a>, <a href="/search/cs?searchtype=author&query=Abeliuk%2C+A">Andres Abeliuk</a>, <a href="/search/cs?searchtype=author&query=Atanasov%2C+P">Pavel Atanasov</a>, <a href="/search/cs?searchtype=author&query=Bennett%2C+S">Stephen Bennett</a>, <a href="/search/cs?searchtype=author&query=Beger%2C+A">Andreas Beger</a>, <a href="/search/cs?searchtype=author&query=Birari%2C+S">Saurabh Birari</a>, <a href="/search/cs?searchtype=author&query=Budescu%2C+D+V">David V. Budescu</a>, <a href="/search/cs?searchtype=author&query=Catasta%2C+M">Michele Catasta</a>, <a href="/search/cs?searchtype=author&query=Ferrara%2C+E">Emilio Ferrara</a>, <a href="/search/cs?searchtype=author&query=Haravitch%2C+L">Lucas Haravitch</a>, <a href="/search/cs?searchtype=author&query=Himmelstein%2C+M">Mark Himmelstein</a>, <a href="/search/cs?searchtype=author&query=Hossain%2C+K+T">KSM Tozammel Hossain</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuzhong Huang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Woojeong Jin</a>, <a href="/search/cs?searchtype=author&query=Joseph%2C+R">Regina Joseph</a>, <a href="/search/cs?searchtype=author&query=Leskovec%2C+J">Jure Leskovec</a>, <a href="/search/cs?searchtype=author&query=Matsui%2C+A">Akira Matsui</a>, <a href="/search/cs?searchtype=author&query=Mirtaheri%2C+M">Mehrnoosh Mirtaheri</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+X">Xiang Ren</a>, <a href="/search/cs?searchtype=author&query=Satyukov%2C+G">Gleb Satyukov</a>, <a href="/search/cs?searchtype=author&query=Sethi%2C+R">Rajiv Sethi</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Amandeep Singh</a>, <a href="/search/cs?searchtype=author&query=Sosic%2C+R">Rok Sosic</a> , et al. (4 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10981v1-abstract-short" style="display: inline;"> Sound decision-making relies on accurate prediction for tangible outcomes ranging from military conflict to disease outbreaks. To improve crowdsourced forecasting accuracy, we developed SAGE, a hybrid forecasting system that combines human and machine generated forecasts. The system provides a platform where users can interact with machine models and thus anchor their judgments on an objective ben… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10981v1-abstract-full').style.display = 'inline'; document.getElementById('2412.10981v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10981v1-abstract-full" style="display: none;"> Sound decision-making relies on accurate prediction for tangible outcomes ranging from military conflict to disease outbreaks. To improve crowdsourced forecasting accuracy, we developed SAGE, a hybrid forecasting system that combines human and machine generated forecasts. The system provides a platform where users can interact with machine models and thus anchor their judgments on an objective benchmark. The system also aggregates human and machine forecasts weighting both for propinquity and based on assessed skill while adjusting for overconfidence. We present results from the Hybrid Forecasting Competition (HFC) - larger than comparable forecasting tournaments - including 1085 users forecasting 398 real-world forecasting problems over eight months. Our main result is that the hybrid system generated more accurate forecasts compared to a human-only baseline which had no machine generated predictions. We found that skilled forecasters who had access to machine-generated forecasts outperformed those who only viewed historical data. We also demonstrated the inclusion of machine-generated forecasts in our aggregation algorithms improved performance, both in terms of accuracy and scalability. This suggests that hybrid forecasting systems, which potentially require fewer human resources, can be a viable approach for maintaining a competitive level of accuracy over a larger number of forecasting questions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10981v1-abstract-full').style.display = 'none'; document.getElementById('2412.10981v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 6 figures, 4 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> AI Magazine, Volume 44, Issue 1, Pages 112-128, Spring 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09799">arXiv:2412.09799</a> <span> [<a href="https://arxiv.org/pdf/2412.09799">pdf</a>, <a href="https://arxiv.org/format/2412.09799">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CP-DETR: Concept Prompt Guide DETR Toward Stronger Universal Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qibo Chen</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weizhong Jin</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+J">Jianyue Ge</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mengdi Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuchao Yan</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jian Jiang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Li Yu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xuanjiang Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuchang Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jianzhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09799v1-abstract-short" style="display: inline;"> Recent research on universal object detection aims to introduce language in a SoTA closed-set detector and then generalize the open-set concepts by constructing large-scale (text-region) datasets for training. However, these methods face two main challenges: (i) how to efficiently use the prior information in the prompts to genericise objects and (ii) how to reduce alignment bias in the downstream… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09799v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09799v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09799v1-abstract-full" style="display: none;"> Recent research on universal object detection aims to introduce language in a SoTA closed-set detector and then generalize the open-set concepts by constructing large-scale (text-region) datasets for training. However, these methods face two main challenges: (i) how to efficiently use the prior information in the prompts to genericise objects and (ii) how to reduce alignment bias in the downstream tasks, both leading to sub-optimal performance in some scenarios beyond pre-training. To address these challenges, we propose a strong universal detection foundation model called CP-DETR, which is competitive in almost all scenarios, with only one pre-training weight. Specifically, we design an efficient prompt visual hybrid encoder that enhances the information interaction between prompt and visual through scale-by-scale and multi-scale fusion modules. Then, the hybrid encoder is facilitated to fully utilize the prompted information by prompt multi-label loss and auxiliary detection head. In addition to text prompts, we have designed two practical concept prompt generation methods, visual prompt and optimized prompt, to extract abstract concepts through concrete visual examples and stably reduce alignment bias in downstream tasks. With these effective designs, CP-DETR demonstrates superior universal detection performance in a broad spectrum of scenarios. For example, our Swin-T backbone model achieves 47.6 zero-shot AP on LVIS, and the Swin-L backbone model achieves 32.2 zero-shot AP on ODinW35. Furthermore, our visual prompt generation method achieves 68.4 AP on COCO val by interactive detection, and the optimized prompt achieves 73.1 fully-shot AP on ODinW13. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09799v1-abstract-full').style.display = 'none'; document.getElementById('2412.09799v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01136">arXiv:2412.01136</a> <span> [<a href="https://arxiv.org/pdf/2412.01136">pdf</a>, <a href="https://arxiv.org/format/2412.01136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Referring Video Object Segmentation via Language-aligned Track Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+S">Seongchan Kim</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Woojeong Jin</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+S">Sangbeom Lim</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+H">Heeji Yoon</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+H">Hyunwook Choi</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Seungryong Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01136v1-abstract-short" style="display: inline;"> Referring Video Object Segmentation (RVOS) seeks to segment objects throughout a video based on natural language expressions. While existing methods have made strides in vision-language alignment, they often overlook the importance of robust video object tracking, where inconsistent mask tracks can disrupt vision-language alignment, leading to suboptimal performance. In this work, we present Selec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01136v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01136v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01136v1-abstract-full" style="display: none;"> Referring Video Object Segmentation (RVOS) seeks to segment objects throughout a video based on natural language expressions. While existing methods have made strides in vision-language alignment, they often overlook the importance of robust video object tracking, where inconsistent mask tracks can disrupt vision-language alignment, leading to suboptimal performance. In this work, we present Selection by Object Language Alignment (SOLA), a novel framework that reformulates RVOS into two sub-problems, track generation and track selection. In track generation, we leverage a vision foundation model, Segment Anything Model 2 (SAM2), which generates consistent mask tracks across frames, producing reliable candidates for both foreground and background objects. For track selection, we propose a light yet effective selection module that aligns visual and textual features while modeling object appearance and motion within video sequences. This design enables precise motion modeling and alignment of the vision language. Our approach achieves state-of-the-art performance on the challenging MeViS dataset and demonstrates superior results in zero-shot settings on the Ref-Youtube-VOS and Ref-DAVIS datasets. Furthermore, SOLA exhibits strong generalization and robustness in corrupted settings, such as those with added Gaussian noise or motion blur. Our project page is available at https://cvlab-kaist.github.io/SOLA <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01136v1-abstract-full').style.display = 'none'; document.getElementById('2412.01136v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page is available at https://cvlab-kaist.github.io/SOLA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16807">arXiv:2411.16807</a> <span> [<a href="https://arxiv.org/pdf/2411.16807">pdf</a>, <a href="https://arxiv.org/format/2411.16807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ADAF: An Artificial Intelligence Data Assimilation Framework for Weather Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiang%2C+Y">Yanfei Xiang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weixin Jin</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Haiyu Dong</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+M">Mingliang Bai</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Z">Zuliang Fang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+P">Pengcheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hongyu Sun</a>, <a href="/search/cs?searchtype=author&query=Thambiratnam%2C+K">Kit Thambiratnam</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaomeng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16807v1-abstract-short" style="display: inline;"> The forecasting skill of numerical weather prediction (NWP) models critically depends on the accurate initial conditions, also known as analysis, provided by data assimilation (DA). Traditional DA methods often face a trade-off between computational cost and accuracy due to complex linear algebra computations and the high dimensionality of the model, especially in nonlinear systems. Moreover, proc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16807v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16807v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16807v1-abstract-full" style="display: none;"> The forecasting skill of numerical weather prediction (NWP) models critically depends on the accurate initial conditions, also known as analysis, provided by data assimilation (DA). Traditional DA methods often face a trade-off between computational cost and accuracy due to complex linear algebra computations and the high dimensionality of the model, especially in nonlinear systems. Moreover, processing massive data in real-time requires substantial computational resources. To address this, we introduce an artificial intelligence-based data assimilation framework (ADAF) to generate high-quality kilometer-scale analysis. This study is the pioneering work using real-world observations from varied locations and multiple sources to verify the AI method's efficacy in DA, including sparse surface weather observations and satellite imagery. We implemented ADAF for four near-surface variables in the Contiguous United States (CONUS). The results indicate that ADAF surpasses the High Resolution Rapid Refresh Data Assimilation System (HRRRDAS) in accuracy by 16% to 33% for near-surface atmospheric conditions, aligning more closely with actual observations, and can effectively reconstruct extreme events, such as tropical cyclone wind fields. Sensitivity experiments reveal that ADAF can generate high-quality analysis even with low-accuracy backgrounds and extremely sparse surface observations. ADAF can assimilate massive observations within a three-hour window at low computational cost, taking about two seconds on an AMD MI200 graphics processing unit (GPU). ADAF has been shown to be efficient and effective in real-world DA, underscoring its potential role in operational weather forecasting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16807v1-abstract-full').style.display = 'none'; document.getElementById('2411.16807v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16694">arXiv:2411.16694</a> <span> [<a href="https://arxiv.org/pdf/2411.16694">pdf</a>, <a href="https://arxiv.org/format/2411.16694">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Reaction-conditioned De Novo Enzyme Design with GENzyme </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+C">Chenqing Hua</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jiarui Lu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+O">Odin Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jian Tang</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+R">Rex Ying</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wengong Jin</a>, <a href="/search/cs?searchtype=author&query=Wolf%2C+G">Guy Wolf</a>, <a href="/search/cs?searchtype=author&query=Precup%2C+D">Doina Precup</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Shuangjia Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16694v1-abstract-short" style="display: inline;"> The introduction of models like RFDiffusionAA, AlphaFold3, AlphaProteo, and Chai1 has revolutionized protein structure modeling and interaction prediction, primarily from a binding perspective, focusing on creating ideal lock-and-key models. However, these methods can fall short for enzyme-substrate interactions, where perfect binding models are rare, and induced fit states are more common. To add… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16694v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16694v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16694v1-abstract-full" style="display: none;"> The introduction of models like RFDiffusionAA, AlphaFold3, AlphaProteo, and Chai1 has revolutionized protein structure modeling and interaction prediction, primarily from a binding perspective, focusing on creating ideal lock-and-key models. However, these methods can fall short for enzyme-substrate interactions, where perfect binding models are rare, and induced fit states are more common. To address this, we shift to a functional perspective for enzyme design, where the enzyme function is defined by the reaction it catalyzes. Here, we introduce \textsc{GENzyme}, a \textit{de novo} enzyme design model that takes a catalytic reaction as input and generates the catalytic pocket, full enzyme structure, and enzyme-substrate binding complex. \textsc{GENzyme} is an end-to-end, three-staged model that integrates (1) a catalytic pocket generation and sequence co-design module, (2) a pocket inpainting and enzyme inverse folding module, and (3) a binding and screening module to optimize and predict enzyme-substrate complexes. The entire design process is driven by the catalytic reaction being targeted. This reaction-first approach allows for more accurate and biologically relevant enzyme design, potentially surpassing structure-based and binding-focused models in creating enzymes capable of catalyzing specific reactions. We provide \textsc{GENzyme} code at https://github.com/WillHua127/GENzyme. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16694v1-abstract-full').style.display = 'none'; document.getElementById('2411.16694v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09935">arXiv:2411.09935</a> <span> [<a href="https://arxiv.org/pdf/2411.09935">pdf</a>, <a href="https://arxiv.org/format/2411.09935">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Whole-Body Impedance Coordinative Control of Wheel-Legged Robot on Uncertain Terrain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+L">Lei Shi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xinghua Yu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Cheng Zhou</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wanxin Jin</a>, <a href="/search/cs?searchtype=author&query=Chi%2C+W">Wanchao Chi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shenghao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dongsheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiong Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhengyou Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09935v1-abstract-short" style="display: inline;"> This article propose a whole-body impedance coordinative control framework for a wheel-legged humanoid robot to achieve adaptability on complex terrains while maintaining robot upper body stability. The framework contains a bi-level control strategy. The outer level is a variable damping impedance controller, which optimizes the damping parameters to ensure the stability of the upper body while ho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09935v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09935v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09935v1-abstract-full" style="display: none;"> This article propose a whole-body impedance coordinative control framework for a wheel-legged humanoid robot to achieve adaptability on complex terrains while maintaining robot upper body stability. The framework contains a bi-level control strategy. The outer level is a variable damping impedance controller, which optimizes the damping parameters to ensure the stability of the upper body while holding an object. The inner level employs Whole-Body Control (WBC) optimization that integrates real-time terrain estimation based on wheel-foot position and force data. It generates motor torques while accounting for dynamic constraints, joint limits,friction cones, real-time terrain updates, and a model-free friction compensation strategy. The proposed whole-body coordinative control method has been tested on a recently developed quadruped humanoid robot. The results demonstrate that the proposed algorithm effectively controls the robot, maintaining upper body stability to successfully complete a water-carrying task while adapting to varying terrains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09935v1-abstract-full').style.display = 'none'; document.getElementById('2411.09935v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12583">arXiv:2410.12583</a> <span> [<a href="https://arxiv.org/pdf/2410.12583">pdf</a>, <a href="https://arxiv.org/format/2410.12583">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> STRUX: An LLM for Decision-Making with Structured Explanations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yiming Lu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yebowen Hu</a>, <a href="/search/cs?searchtype=author&query=Foroosh%2C+H">Hassan Foroosh</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12583v1-abstract-short" style="display: inline;"> Countless decisions shape our daily lives, and it is paramount to understand the how and why behind these choices. In this paper, we introduce a new LLM decision-making framework called STRUX, which enhances LLM decision-making by providing structured explanations. These include favorable and adverse facts related to the decision, along with their respective strengths. STRUX begins by distilling l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12583v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12583v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12583v1-abstract-full" style="display: none;"> Countless decisions shape our daily lives, and it is paramount to understand the how and why behind these choices. In this paper, we introduce a new LLM decision-making framework called STRUX, which enhances LLM decision-making by providing structured explanations. These include favorable and adverse facts related to the decision, along with their respective strengths. STRUX begins by distilling lengthy information into a concise table of key facts. It then employs a series of self-reflection steps to determine which of these facts are pivotal, categorizing them as either favorable or adverse in relation to a specific decision. Lastly, we fine-tune an LLM to identify and prioritize these key facts to optimize decision-making. STRUX has been evaluated on the challenging task of forecasting stock investment decisions based on earnings call transcripts and demonstrated superior performance against strong baselines. It enhances decision transparency by allowing users to understand the impact of different factors, representing a meaningful step towards practical decision-making with LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12583v1-abstract-full').style.display = 'none'; document.getElementById('2410.12583v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 7 figures, submitted to NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09543">arXiv:2410.09543</a> <span> [<a href="https://arxiv.org/pdf/2410.09543">pdf</a>, <a href="https://arxiv.org/format/2410.09543">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Boltzmann-Aligned Inverse Folding Model as a Predictor of Mutational Effects on Protein-Protein Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiao%2C+X">Xiaoran Jiao</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+W">Weian Mao</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wengong Jin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peiyuan Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+C">Chunhua Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09543v1-abstract-short" style="display: inline;"> Predicting the change in binding free energy ($螖螖G$) is crucial for understanding and modulating protein-protein interactions, which are critical in drug design. Due to the scarcity of experimental $螖螖G$ data, existing methods focus on pre-training, while neglecting the importance of alignment. In this work, we propose the Boltzmann Alignment technique to transfer knowledge from pre-trained invers… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09543v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09543v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09543v1-abstract-full" style="display: none;"> Predicting the change in binding free energy ($螖螖G$) is crucial for understanding and modulating protein-protein interactions, which are critical in drug design. Due to the scarcity of experimental $螖螖G$ data, existing methods focus on pre-training, while neglecting the importance of alignment. In this work, we propose the Boltzmann Alignment technique to transfer knowledge from pre-trained inverse folding models to $螖螖G$ prediction. We begin by analyzing the thermodynamic definition of $螖螖G$ and introducing the Boltzmann distribution to connect energy with protein conformational distribution. However, the protein conformational distribution is intractable; therefore, we employ Bayes' theorem to circumvent direct estimation and instead utilize the log-likelihood provided by protein inverse folding models for $螖螖G$ estimation. Compared to previous inverse folding-based methods, our method explicitly accounts for the unbound state of protein complex in the $螖螖G$ thermodynamic cycle, introducing a physical inductive bias and achieving both supervised and unsupervised state-of-the-art (SoTA) performance. Experimental results on SKEMPI v2 indicate that our method achieves Spearman coefficients of 0.3201 (unsupervised) and 0.5134 (supervised), significantly surpassing the previously reported SoTA values of 0.2632 and 0.4324, respectively. Futhermore, we demonstrate the capability of our method on binding energy prediction, protein-protein docking and antibody optimization tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09543v1-abstract-full').style.display = 'none'; document.getElementById('2410.09543v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09286">arXiv:2410.09286</a> <span> [<a href="https://arxiv.org/pdf/2410.09286">pdf</a>, <a href="https://arxiv.org/format/2410.09286">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Language-Model-Assisted Bi-Level Programming for Reward Learning from Internet Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mahesheka%2C+H">Harsh Mahesheka</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhixian Xie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhaoran Wang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wanxin Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09286v1-abstract-short" style="display: inline;"> Learning from Demonstrations, particularly from biological experts like humans and animals, often encounters significant data acquisition challenges. While recent approaches leverage internet videos for learning, they require complex, task-specific pipelines to extract and retarget motion data for the agent. In this work, we introduce a language-model-assisted bi-level programming framework that e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09286v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09286v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09286v1-abstract-full" style="display: none;"> Learning from Demonstrations, particularly from biological experts like humans and animals, often encounters significant data acquisition challenges. While recent approaches leverage internet videos for learning, they require complex, task-specific pipelines to extract and retarget motion data for the agent. In this work, we introduce a language-model-assisted bi-level programming framework that enables a reinforcement learning agent to directly learn its reward from internet videos, bypassing dedicated data preparation. The framework includes two levels: an upper level where a vision-language model (VLM) provides feedback by comparing the learner's behavior with expert videos, and a lower level where a large language model (LLM) translates this feedback into reward updates. The VLM and LLM collaborate within this bi-level framework, using a "chain rule" approach to derive a valid search direction for reward learning. We validate the method for reward learning from YouTube videos, and the results have shown that the proposed method enables efficient reward design from expert videos of biological agents for complex behavior synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09286v1-abstract-full').style.display = 'none'; document.getElementById('2410.09286v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06373">arXiv:2410.06373</a> <span> [<a href="https://arxiv.org/pdf/2410.06373">pdf</a>, <a href="https://arxiv.org/format/2410.06373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Unveiling the Backbone-Optimizer Coupling Bias in Visual Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+S">Siyuan Li</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+J">Juanxi Tian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zedong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Luyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zicheng Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weiyang Jin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+B">Baigui Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S+Z">Stan Z. Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06373v1-abstract-short" style="display: inline;"> This paper delves into the interplay between vision backbones and optimizers, unvealing an inter-dependent phenomenon termed \textit{\textbf{b}ackbone-\textbf{o}ptimizer \textbf{c}oupling \textbf{b}ias} (BOCB). We observe that canonical CNNs, such as VGG and ResNet, exhibit a marked co-dependency with SGD families, while recent architectures like ViTs and ConvNeXt share a tight coupling with the a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06373v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06373v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06373v1-abstract-full" style="display: none;"> This paper delves into the interplay between vision backbones and optimizers, unvealing an inter-dependent phenomenon termed \textit{\textbf{b}ackbone-\textbf{o}ptimizer \textbf{c}oupling \textbf{b}ias} (BOCB). We observe that canonical CNNs, such as VGG and ResNet, exhibit a marked co-dependency with SGD families, while recent architectures like ViTs and ConvNeXt share a tight coupling with the adaptive learning rate ones. We further show that BOCB can be introduced by both optimizers and certain backbone designs and may significantly impact the pre-training and downstream fine-tuning of vision models. Through in-depth empirical analysis, we summarize takeaways on recommended optimizers and insights into robust vision backbone architectures. We hope this work can inspire the community to question long-held assumptions on backbones and optimizers, stimulate further explorations, and thereby contribute to more robust vision systems. The source code and models are publicly available at https://bocb-ai.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06373v1-abstract-full').style.display = 'none'; document.getElementById('2410.06373v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint V1. Online project at https://bocb-ai.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06340">arXiv:2410.06340</a> <span> [<a href="https://arxiv.org/pdf/2410.06340">pdf</a>, <a href="https://arxiv.org/format/2410.06340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FedGraph: A Research Library and Benchmark for Federated Graph Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuhang Yao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuan Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xinyi Fan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Junhao Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kay Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weizhao Jin</a>, <a href="/search/cs?searchtype=author&query=Ravi%2C+S">Srivatsan Ravi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P+S">Philip S. Yu</a>, <a href="/search/cs?searchtype=author&query=Joe-Wong%2C+C">Carlee Joe-Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06340v2-abstract-short" style="display: inline;"> Federated graph learning is an emerging field with significant practical challenges. While many algorithms have been proposed to enhance the accuracy of training graph neural networks, e.g., for node classification problems on large graphs, in a federated manner, their system performance is often overlooked, even though it is crucial for real-world deployment. To address this gap, we introduce Fed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06340v2-abstract-full').style.display = 'inline'; document.getElementById('2410.06340v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06340v2-abstract-full" style="display: none;"> Federated graph learning is an emerging field with significant practical challenges. While many algorithms have been proposed to enhance the accuracy of training graph neural networks, e.g., for node classification problems on large graphs, in a federated manner, their system performance is often overlooked, even though it is crucial for real-world deployment. To address this gap, we introduce FedGraph, a research library built for practical distributed deployment and benchmarking in federated graph learning. FedGraph supports a range of state-of-the-art graph learning methods and includes built-in profiling tools to evaluate system performance, focusing specifically on communication and computation costs during training. Unlike existing benchmark platforms, FedGraph natively incorporates homomorphic encryption to enhance privacy preservation and facilitates the development of practical applications by enabling distributed training across multiple physical machines, providing an evaluation framework that can guide the system design of future federated graph learning algorithms. Leveraging these optimizations, we use FedGraph to demonstrate the first privacy-preserving federated learning system to run on graphs with 100 million nodes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06340v2-abstract-full').style.display = 'none'; document.getElementById('2410.06340v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://github.com/FedGraph/fedgraph</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00049">arXiv:2410.00049</a> <span> [<a href="https://arxiv.org/pdf/2410.00049">pdf</a>, <a href="https://arxiv.org/format/2410.00049">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Epidemiology-Aware Neural ODE with Continuous Disease Transmission Graph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wan%2C+G">Guancheng Wan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zewen Liu</a>, <a href="/search/cs?searchtype=author&query=Lau%2C+M+S+Y">Max S. Y. Lau</a>, <a href="/search/cs?searchtype=author&query=Prakash%2C+B+A">B. Aditya Prakash</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00049v2-abstract-short" style="display: inline;"> Effective epidemic forecasting is critical for public health strategies and efficient medical resource allocation, especially in the face of rapidly spreading infectious diseases. However, existing deep-learning methods often overlook the dynamic nature of epidemics and fail to account for the specific mechanisms of disease transmission. In response to these challenges, we introduce an innovative… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00049v2-abstract-full').style.display = 'inline'; document.getElementById('2410.00049v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00049v2-abstract-full" style="display: none;"> Effective epidemic forecasting is critical for public health strategies and efficient medical resource allocation, especially in the face of rapidly spreading infectious diseases. However, existing deep-learning methods often overlook the dynamic nature of epidemics and fail to account for the specific mechanisms of disease transmission. In response to these challenges, we introduce an innovative end-to-end framework called Epidemiology-Aware Neural ODE with Continuous Disease Transmission Graph (EARTH) in this paper. To learn continuous and regional disease transmission patterns, we first propose EANO, which seamlessly integrates the neural ODE approach with the epidemic mechanism, considering the complex spatial spread process during epidemic evolution. Additionally, we introduce GLTG to model global infection trends and leverage these signals to guide local transmission dynamically. To accommodate both the global coherence of epidemic trends and the local nuances of epidemic transmission patterns, we build a cross-attention approach to fuse the most meaningful information for forecasting. Through the smooth synergy of both components, EARTH offers a more robust and flexible approach to understanding and predicting the spread of infectious diseases. Extensive experiments show EARTH superior performance in forecasting real-world epidemics compared to state-of-the-art methods. The code will be available at https://github.com/Emory-Melody/EpiLearn. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00049v2-abstract-full').style.display = 'none'; document.getElementById('2410.00049v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18042">arXiv:2409.18042</a> <span> [<a href="https://arxiv.org/pdf/2409.18042">pdf</a>, <a href="https://arxiv.org/format/2409.18042">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Gou%2C+Y">Yunhao Gou</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+R">Runhui Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhili Liu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+D">Daxin Tan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jing Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chunwei Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yi Zhu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yihan Zeng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+K">Kuo Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dingdong Wang</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+K">Kun Xiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haoyuan Li</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+H">Haoli Bai</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jianhua Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaohui Li</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weike Jin</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+N">Nian Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&query=Kwok%2C+J+T">James T. Kwok</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengshuang Zhao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiaodan Liang</a>, <a href="/search/cs?searchtype=author&query=Yeung%2C+D">Dit-Yan Yeung</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiao Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenguo Li</a> , et al. (6 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18042v2-abstract-short" style="display: inline;"> GPT-4o, an omni-modal model that enables vocal conversations with diverse emotions and tones, marks a milestone for omni-modal foundation models. However, empowering Large Language Models to perceive and generate images, texts, and speeches end-to-end with publicly available data remains challenging in the open-source community. Existing vision-language models rely on external tools for the speech… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18042v2-abstract-full').style.display = 'inline'; document.getElementById('2409.18042v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18042v2-abstract-full" style="display: none;"> GPT-4o, an omni-modal model that enables vocal conversations with diverse emotions and tones, marks a milestone for omni-modal foundation models. However, empowering Large Language Models to perceive and generate images, texts, and speeches end-to-end with publicly available data remains challenging in the open-source community. Existing vision-language models rely on external tools for the speech processing, while speech-language models still suffer from limited or even without vision-understanding abilities. To address this gap, we propose EMOVA (EMotionally Omni-present Voice Assistant), to enable Large Language Models with end-to-end speech capabilities while maintaining the leading vision-language performance. With a semantic-acoustic disentangled speech tokenizer, we notice surprisingly that omni-modal alignment can further enhance vision-language and speech abilities compared with the corresponding bi-modal aligned counterparts. Moreover, a lightweight style module is proposed for flexible speech style controls (e.g., emotions and pitches). For the first time, EMOVA achieves state-of-the-art performance on both the vision-language and speech benchmarks, and meanwhile, supporting omni-modal spoken dialogue with vivid emotions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18042v2-abstract-full').style.display = 'none'; document.getElementById('2409.18042v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://emova-ollm.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09371">arXiv:2409.09371</a> <span> [<a href="https://arxiv.org/pdf/2409.09371">pdf</a>, <a href="https://arxiv.org/format/2409.09371">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> WeatherReal: A Benchmark Based on In-Situ Observations for Evaluating Weather Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weixin Jin</a>, <a href="/search/cs?searchtype=author&query=Weyn%2C+J">Jonathan Weyn</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+P">Pengcheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Siqi Xiang</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+J">Jiang Bian</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Z">Zuliang Fang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Haiyu Dong</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hongyu Sun</a>, <a href="/search/cs?searchtype=author&query=Thambiratnam%2C+K">Kit Thambiratnam</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09371v1-abstract-short" style="display: inline;"> In recent years, AI-based weather forecasting models have matched or even outperformed numerical weather prediction systems. However, most of these models have been trained and evaluated on reanalysis datasets like ERA5. These datasets, being products of numerical models, often diverge substantially from actual observations in some crucial variables like near-surface temperature, wind, precipitati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09371v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09371v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09371v1-abstract-full" style="display: none;"> In recent years, AI-based weather forecasting models have matched or even outperformed numerical weather prediction systems. However, most of these models have been trained and evaluated on reanalysis datasets like ERA5. These datasets, being products of numerical models, often diverge substantially from actual observations in some crucial variables like near-surface temperature, wind, precipitation and clouds - parameters that hold significant public interest. To address this divergence, we introduce WeatherReal, a novel benchmark dataset for weather forecasting, derived from global near-surface in-situ observations. WeatherReal also features a publicly accessible quality control and evaluation framework. This paper details the sources and processing methodologies underlying the dataset, and further illustrates the advantage of in-situ observations in capturing hyper-local and extreme weather through comparative analyses and case studies. Using WeatherReal, we evaluated several data-driven models and compared them with leading numerical models. Our work aims to advance the AI-based weather forecasting research towards a more application-focused and operation-ready approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09371v1-abstract-full').style.display = 'none'; document.getElementById('2409.09371v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08487">arXiv:2409.08487</a> <span> [<a href="https://arxiv.org/pdf/2409.08487">pdf</a>, <a href="https://arxiv.org/format/2409.08487">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Sub-graph Based Diffusion Model for Link Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Hang Li</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a>, <a href="/search/cs?searchtype=author&query=Skenderi%2C+G">Geri Skenderi</a>, <a href="/search/cs?searchtype=author&query=Shomer%2C+H">Harry Shomer</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+W">Wenzhuo Tang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+W">Wenqi Fan</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiliang Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08487v1-abstract-short" style="display: inline;"> Denoising Diffusion Probabilistic Models (DDPMs) represent a contemporary class of generative models with exceptional qualities in both synthesis and maximizing the data likelihood. These models work by traversing a forward Markov Chain where data is perturbed, followed by a reverse process where a neural network learns to undo the perturbations and recover the original data. There have been incre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08487v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08487v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08487v1-abstract-full" style="display: none;"> Denoising Diffusion Probabilistic Models (DDPMs) represent a contemporary class of generative models with exceptional qualities in both synthesis and maximizing the data likelihood. These models work by traversing a forward Markov Chain where data is perturbed, followed by a reverse process where a neural network learns to undo the perturbations and recover the original data. There have been increasing efforts exploring the applications of DDPMs in the graph domain. However, most of them have focused on the generative perspective. In this paper, we aim to build a novel generative model for link prediction. In particular, we treat link prediction between a pair of nodes as a conditional likelihood estimation of its enclosing sub-graph. With a dedicated design to decompose the likelihood estimation process via the Bayesian formula, we are able to separate the estimation of sub-graph structure and its node features. Such designs allow our model to simultaneously enjoy the advantages of inductive learning and the strong generalization capability. Remarkably, comprehensive experiments across various datasets validate that our proposed method presents numerous advantages: (1) transferability across datasets without retraining, (2) promising generalization on limited training data, and (3) robustness against graph adversarial attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08487v1-abstract-full').style.display = 'none'; document.getElementById('2409.08487v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05212">arXiv:2409.05212</a> <span> [<a href="https://arxiv.org/pdf/2409.05212">pdf</a>, <a href="https://arxiv.org/format/2409.05212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> SS-BRPE: Self-Supervised Blind Room Parameter Estimation Using Attention Mechanisms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chunxi Wang</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+M">Maoshen Jia</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Meiran Li</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+C">Changchun Bao</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wenyu Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05212v1-abstract-short" style="display: inline;"> In recent years, dynamic parameterization of acoustic environments has garnered attention in audio processing. This focus includes room volume and reverberation time (RT60), which define local acoustics independent of sound source and receiver orientation. Previous studies show that purely attention-based models can achieve advanced results in room parameter estimation. However, their success reli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05212v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05212v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05212v1-abstract-full" style="display: none;"> In recent years, dynamic parameterization of acoustic environments has garnered attention in audio processing. This focus includes room volume and reverberation time (RT60), which define local acoustics independent of sound source and receiver orientation. Previous studies show that purely attention-based models can achieve advanced results in room parameter estimation. However, their success relies on supervised pretrainings that require a large amount of labeled true values for room parameters and complex training pipelines. In light of this, we propose a novel Self-Supervised Blind Room Parameter Estimation (SS-BRPE) system. This system combines a purely attention-based model with self-supervised learning to estimate room acoustic parameters, from single-channel noisy speech signals. By utilizing unlabeled audio data for pretraining, the proposed system significantly reduces dependencies on costly labeled datasets. Our model also incorporates dynamic feature augmentation during fine-tuning to enhance adaptability and generalizability. Experimental results demonstrate that the SS-BRPE system not only achieves more superior performance in estimating room parameters than state-of-the-art (SOTA) methods but also effectively maintains high accuracy under conditions with limited labeled data. Code available at https://github.com/bjut-chunxiwang/SS-BRPE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05212v1-abstract-full').style.display = 'none'; document.getElementById('2409.05212v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, submitted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09612">arXiv:2408.09612</a> <span> [<a href="https://arxiv.org/pdf/2408.09612">pdf</a>, <a href="https://arxiv.org/format/2408.09612">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> ContactSDF: Signed Distance Functions as Multi-Contact Models for Dexterous Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wen Yang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wanxin Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09612v1-abstract-short" style="display: inline;"> In this paper, we propose ContactSDF, a method that uses signed distance functions (SDFs) to approximate multi-contact models, including both collision detection and time-stepping routines. ContactSDF first establishes an SDF using the supporting plane representation of an object for collision detection, and then use the generated contact dual cones to build a second SDF for time stepping predicti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09612v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09612v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09612v1-abstract-full" style="display: none;"> In this paper, we propose ContactSDF, a method that uses signed distance functions (SDFs) to approximate multi-contact models, including both collision detection and time-stepping routines. ContactSDF first establishes an SDF using the supporting plane representation of an object for collision detection, and then use the generated contact dual cones to build a second SDF for time stepping prediction of the next state. Those two SDFs create a differentiable and closed-form multi-contact dynamic model for state prediction, enabling efficient model learning and optimization for contact-rich manipulation. We perform extensive simulation experiments to show the effectiveness of ContactSDF for model learning and real-time control of dexterous manipulation. We further evaluate the ContactSDF on a hardware Allegro hand for on-palm reorientation tasks. Results show with around 2 minutes of learning on hardware, the ContactSDF achieves high-quality dexterous manipulation at a frequency of 30-60Hz. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09612v1-abstract-full').style.display = 'none'; document.getElementById('2408.09612v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09327">arXiv:2408.09327</a> <span> [<a href="https://arxiv.org/pdf/2408.09327">pdf</a>, <a href="https://arxiv.org/format/2408.09327">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Threshold Filtering Packing for Supervised Fine-Tuning: Training Related Samples within Packs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+J">Jiancheng Dong</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Lei Jiang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+L">Lu Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09327v1-abstract-short" style="display: inline;"> Packing for Supervised Fine-Tuning (SFT) in autoregressive models involves concatenating data points of varying lengths until reaching the designed maximum length to facilitate GPU processing. However, randomly concatenating data points and feeding them into an autoregressive transformer can lead to cross-contamination of sequences due to the significant difference in their subject matter. The mai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09327v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09327v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09327v1-abstract-full" style="display: none;"> Packing for Supervised Fine-Tuning (SFT) in autoregressive models involves concatenating data points of varying lengths until reaching the designed maximum length to facilitate GPU processing. However, randomly concatenating data points and feeding them into an autoregressive transformer can lead to cross-contamination of sequences due to the significant difference in their subject matter. The mainstream approaches in SFT ensure that each token in the attention calculation phase only focuses on tokens within its own short sequence, without providing additional learning signals for the preceding context. To address these challenges, we introduce Threshold Filtering Packing (TFP), a method that selects samples with related context while maintaining sufficient diversity within the same pack. Our experiments show that TFP offers a simple-to-implement and scalable approach that significantly enhances SFT performance, with observed improvements of up to 7\% on GSM8K, 4\% on HumanEval, and 15\% on the adult-census-income dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09327v1-abstract-full').style.display = 'none'; document.getElementById('2408.09327v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07855">arXiv:2408.07855</a> <span> [<a href="https://arxiv.org/pdf/2408.07855">pdf</a>, <a href="https://arxiv.org/format/2408.07855">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Complementarity-Free Multi-Contact Modeling and Optimization for Dexterous Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wanxin Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07855v3-abstract-short" style="display: inline;"> A significant barrier preventing model-based methods from achieving real-time and versatile dexterous robotic manipulation is the inherent complexity of multi-contact dynamics. Traditionally formulated as complementarity models, multi-contact dynamics introduces non-smoothness and combinatorial complexity, complicating contact-rich planning and optimization. In this paper, we circumvent these chal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07855v3-abstract-full').style.display = 'inline'; document.getElementById('2408.07855v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07855v3-abstract-full" style="display: none;"> A significant barrier preventing model-based methods from achieving real-time and versatile dexterous robotic manipulation is the inherent complexity of multi-contact dynamics. Traditionally formulated as complementarity models, multi-contact dynamics introduces non-smoothness and combinatorial complexity, complicating contact-rich planning and optimization. In this paper, we circumvent these challenges by introducing a lightweight yet capable multi-contact model. Our new model, derived from the duality of optimization-based contact models, dispenses with the complementarity constructs entirely, providing computational advantages such as closed-form time stepping, differentiability, automatic satisfaction with Coulomb friction law, and minimal hyperparameter tuning. We demonstrate the effectiveness and efficiency of the model for planning and control in a range of challenging dexterous manipulation tasks, including fingertip 3D in-air manipulation, TriFinger in-hand manipulation, and Allegro hand on-palm reorientation, all performed with diverse objects. Our method consistently achieves state-of-the-art results: (I) a 96.5% average success rate across all objects and tasks, (II) high manipulation accuracy with an average reorientation error of 11掳 and position error of 7.8mm, and (III) contact-implicit model predictive control running at 50-100 Hz for all objects and tasks. These results are achieved with minimal hyperparameter tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07855v3-abstract-full').style.display = 'none'; document.getElementById('2408.07855v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Video demo: https://youtu.be/NsL4hbSXvFg</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19902">arXiv:2407.19902</a> <span> [<a href="https://arxiv.org/pdf/2407.19902">pdf</a>, <a href="https://arxiv.org/format/2407.19902">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> A Differential Dynamic Programming Framework for Inverse Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+K">Kun Cao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xinhang Xu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wanxin Jin</a>, <a href="/search/cs?searchtype=author&query=Johansson%2C+K+H">Karl H. Johansson</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+L">Lihua Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19902v1-abstract-short" style="display: inline;"> A differential dynamic programming (DDP)-based framework for inverse reinforcement learning (IRL) is introduced to recover the parameters in the cost function, system dynamics, and constraints from demonstrations. Different from existing work, where DDP was used for the inner forward problem with inequality constraints, our proposed framework uses it for efficient computation of the gradient requi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19902v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19902v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19902v1-abstract-full" style="display: none;"> A differential dynamic programming (DDP)-based framework for inverse reinforcement learning (IRL) is introduced to recover the parameters in the cost function, system dynamics, and constraints from demonstrations. Different from existing work, where DDP was used for the inner forward problem with inequality constraints, our proposed framework uses it for efficient computation of the gradient required in the outer inverse problem with equality and inequality constraints. The equivalence between the proposed method and existing methods based on Pontryagin's Maximum Principle (PMP) is established. More importantly, using this DDP-based IRL with an open-loop loss function, a closed-loop IRL framework is presented. In this framework, a loss function is proposed to capture the closed-loop nature of demonstrations. It is shown to be better than the commonly used open-loop loss function. We show that the closed-loop IRL framework reduces to a constrained inverse optimal control problem under certain assumptions. Under these assumptions and a rank condition, it is proven that the learning parameters can be recovered from the demonstration data. The proposed framework is extensively evaluated through four numerical robot examples and one real-world quadrotor system. The experiments validate the theoretical results and illustrate the practical relevance of the approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19902v1-abstract-full').style.display = 'none'; document.getElementById('2407.19902v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 15 figures; submitted to IEEE for potential publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12068">arXiv:2407.12068</a> <span> [<a href="https://arxiv.org/pdf/2407.12068">pdf</a>, <a href="https://arxiv.org/format/2407.12068">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning on Graphs with Large Language Models(LLMs): A Deep Dive into Model Robustness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+K">Kai Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zewen Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhikai Chen</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+H">Hongzhi Wen</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiliang Tang</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+Y">Yi Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12068v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated remarkable performance across various natural language processing tasks. Recently, several LLMs-based pipelines have been developed to enhance learning on graphs with text attributes, showcasing promising performance. However, graphs are well-known to be susceptible to adversarial attacks and it remains unclear whether LLMs exhibit robustness in learn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12068v2-abstract-full').style.display = 'inline'; document.getElementById('2407.12068v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12068v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated remarkable performance across various natural language processing tasks. Recently, several LLMs-based pipelines have been developed to enhance learning on graphs with text attributes, showcasing promising performance. However, graphs are well-known to be susceptible to adversarial attacks and it remains unclear whether LLMs exhibit robustness in learning on graphs. To address this gap, our work aims to explore the potential of LLMs in the context of adversarial attacks on graphs. Specifically, we investigate the robustness against graph structural and textual perturbations in terms of two dimensions: LLMs-as-Enhancers and LLMs-as-Predictors. Through extensive experiments, we find that, compared to shallow models, both LLMs-as-Enhancers and LLMs-as-Predictors offer superior robustness against structural and textual attacks.Based on these findings, we carried out additional analyses to investigate the underlying causes. Furthermore, we have made our benchmark library openly available to facilitate quick and fair evaluations, and to encourage ongoing innovative research in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12068v2-abstract-full').style.display = 'none'; document.getElementById('2407.12068v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04216">arXiv:2407.04216</a> <span> [<a href="https://arxiv.org/pdf/2407.04216">pdf</a>, <a href="https://arxiv.org/format/2407.04216">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Safe MPC Alignment with Human Directional Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhixian Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenlong Zhang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yi Ren</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhaoran Wang</a>, <a href="/search/cs?searchtype=author&query=Pappas%2C+G+J">George J. Pappas</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wanxin Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04216v2-abstract-short" style="display: inline;"> In safety-critical robot planning or control, manually specifying safety constraints or learning them from demonstrations can be challenging. In this article, we propose a certifiable alignment method for a robot to learn a safety constraint in its model predictive control (MPC) policy with human online directional feedback. To our knowledge, it is the first method to learn safety constraints from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04216v2-abstract-full').style.display = 'inline'; document.getElementById('2407.04216v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04216v2-abstract-full" style="display: none;"> In safety-critical robot planning or control, manually specifying safety constraints or learning them from demonstrations can be challenging. In this article, we propose a certifiable alignment method for a robot to learn a safety constraint in its model predictive control (MPC) policy with human online directional feedback. To our knowledge, it is the first method to learn safety constraints from human feedback. The proposed method is based on an empirical observation: human directional feedback, when available, tends to guide the robot toward safer regions. The method only requires the direction of human feedback to update the learning hypothesis space. It is certifiable, providing an upper bound on the total number of human feedback in the case of successful learning, or declaring the hypothesis misspecification, i.e., the true implicit safety constraint cannot be found within the specified hypothesis space. We evaluated the proposed method using numerical examples and user studies in two simulation games. Additionally, we implemented and tested the proposed method on a real-world Franka robot arm performing mobile water-pouring tasks. The results demonstrate the efficacy and efficiency of our method, showing that it enables a robot to successfully learn safety constraints with a small handful (tens) of human directional corrections. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04216v2-abstract-full').style.display = 'none'; document.getElementById('2407.04216v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, submission to T-RO</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19632">arXiv:2406.19632</a> <span> [<a href="https://arxiv.org/pdf/2406.19632">pdf</a>, <a href="https://arxiv.org/format/2406.19632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PPTFormer: Pseudo Multi-Perspective Transformer for UAV Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+D">Deyi Ji</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wenwei Jin</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Hongtao Lu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+F">Feng Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19632v2-abstract-short" style="display: inline;"> The ascension of Unmanned Aerial Vehicles (UAVs) in various fields necessitates effective UAV image segmentation, which faces challenges due to the dynamic perspectives of UAV-captured images. Traditional segmentation algorithms falter as they cannot accurately mimic the complexity of UAV perspectives, and the cost of obtaining multi-perspective labeled datasets is prohibitive. To address these is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19632v2-abstract-full').style.display = 'inline'; document.getElementById('2406.19632v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19632v2-abstract-full" style="display: none;"> The ascension of Unmanned Aerial Vehicles (UAVs) in various fields necessitates effective UAV image segmentation, which faces challenges due to the dynamic perspectives of UAV-captured images. Traditional segmentation algorithms falter as they cannot accurately mimic the complexity of UAV perspectives, and the cost of obtaining multi-perspective labeled datasets is prohibitive. To address these issues, we introduce the PPTFormer, a novel \textbf{P}seudo Multi-\textbf{P}erspective \textbf{T}rans\textbf{former} network that revolutionizes UAV image segmentation. Our approach circumvents the need for actual multi-perspective data by creating pseudo perspectives for enhanced multi-perspective learning. The PPTFormer network boasts Perspective Representation, novel Perspective Prototypes, and a specialized encoder and decoder that together achieve superior segmentation results through Pseudo Multi-Perspective Attention (PMP Attention) and fusion. Our experiments demonstrate that PPTFormer achieves state-of-the-art performance across five UAV segmentation datasets, confirming its capability to effectively simulate UAV flight perspectives and significantly advance segmentation precision. This work presents a pioneering leap in UAV scene understanding and sets a new benchmark for future developments in semantic segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19632v2-abstract-full').style.display = 'none'; document.getElementById('2406.19632v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IJCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18379">arXiv:2406.18379</a> <span> [<a href="https://arxiv.org/pdf/2406.18379">pdf</a>, <a href="https://arxiv.org/format/2406.18379">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> MALSIGHT: Exploring Malicious Source Code and Benign Pseudocode for Iterative Binary Malware Summarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+H">Haolang Lu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+H">Hongrui Peng</a>, <a href="/search/cs?searchtype=author&query=Nan%2C+G">Guoshun Nan</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+J">Jiaoyang Cui</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Cheng Wang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weifei Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Songtao Wang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+S">Shengli Pan</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+X">Xiaofeng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18379v2-abstract-short" style="display: inline;"> Binary malware summarization aims to automatically generate human-readable descriptions of malware behaviors from executable files, facilitating tasks like malware cracking and detection. Previous methods based on Large Language Models (LLMs) have shown great promise. However, they still face significant issues, including poor usability, inaccurate explanations,and incomplete summaries, primarily… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18379v2-abstract-full').style.display = 'inline'; document.getElementById('2406.18379v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18379v2-abstract-full" style="display: none;"> Binary malware summarization aims to automatically generate human-readable descriptions of malware behaviors from executable files, facilitating tasks like malware cracking and detection. Previous methods based on Large Language Models (LLMs) have shown great promise. However, they still face significant issues, including poor usability, inaccurate explanations,and incomplete summaries, primarily due to the obscure pseudocode structure and the lack of malware training summaries. Further, calling relationships between functions, which involve the rich interactions within a binary malware, remain largely underexplored. To this end, we propose MALSIGHT, a novel code summarization framework that can iteratively generate descriptions of binary malware by exploring malicious source code and benign pseudocode. Specifically, we construct the first malware summary dataset, MalS and MalP, using an LLM and manually refine this dataset with human effort. At the training stage, we tune our proposed MalT5, a novel LLM-based code model, on the MalS and benign pseudocode datasets. Then, at the test stage, we iteratively feed the pseudocode functions into MalT5 to obtain the summary. Such a procedure facilitates the understanding of pseudocode structure and captures the intricate interactions between functions, thereby benefiting summaries' usability, accuracy, and completeness. Additionally, we propose a novel evaluation benchmark, BLEURT-sum, to measure the quality of summaries. Experiments on three datasets show the effectiveness of the proposed MALSIGHT. Notably, our proposed MalT5, with only 0.77B parameters, delivers comparable performance to much larger Code-Llama. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18379v2-abstract-full').style.display = 'none'; document.getElementById('2406.18379v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16715">arXiv:2406.16715</a> <span> [<a href="https://arxiv.org/pdf/2406.16715">pdf</a>, <a href="https://arxiv.org/format/2406.16715">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GC4NC: A Benchmark Framework for Graph Condensation on Node Classification with New Insights </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gong%2C+S">Shengbo Gong</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+J">Juntong Ni</a>, <a href="/search/cs?searchtype=author&query=Sachdeva%2C+N">Noveen Sachdeva</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Carl Yang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16715v2-abstract-short" style="display: inline;"> Graph condensation (GC) is an emerging technique designed to learn a significantly smaller graph that retains the essential information of the original graph. This condensed graph has shown promise in accelerating graph neural networks while preserving performance comparable to those achieved with the original, larger graphs. Additionally, this technique facilitates downstream applications like ne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16715v2-abstract-full').style.display = 'inline'; document.getElementById('2406.16715v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16715v2-abstract-full" style="display: none;"> Graph condensation (GC) is an emerging technique designed to learn a significantly smaller graph that retains the essential information of the original graph. This condensed graph has shown promise in accelerating graph neural networks while preserving performance comparable to those achieved with the original, larger graphs. Additionally, this technique facilitates downstream applications like neural architecture search and deepens our understanding of redundancies in large graphs. Despite the rapid development of GC methods, particularly for node classification, a unified evaluation framework is still lacking to systematically compare different GC methods or clarify key design choices for improving their effectiveness. To bridge these gaps, we introduce \textbf{GC4NC}, a comprehensive framework for evaluating diverse GC methods on node classification across multiple dimensions including performance, efficiency, privacy preservation, denoising ability, NAS effectiveness, and transferability. Our systematic evaluation offers novel insights into how condensed graphs behave and the critical design choices that drive their success. These findings pave the way for future advancements in GC methods, enhancing both performance and expanding their real-world applications. Our code is available at \url{https://github.com/Emory-Melody/GraphSlim/tree/main/benchmark}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16715v2-abstract-full').style.display = 'none'; document.getElementById('2406.16715v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16042">arXiv:2406.16042</a> <span> [<a href="https://arxiv.org/pdf/2406.16042">pdf</a>, <a href="https://arxiv.org/format/2406.16042">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Pose-dIVE: Pose-Diversified Augmentation with Diffusion Model for Person Re-Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+I+H">In猫s Hyeonsu Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">JoungBin Lee</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Woojeong Jin</a>, <a href="/search/cs?searchtype=author&query=Son%2C+S">Soowon Son</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+K">Kyusun Cho</a>, <a href="/search/cs?searchtype=author&query=Seo%2C+J">Junyoung Seo</a>, <a href="/search/cs?searchtype=author&query=Kwak%2C+M">Min-Seop Kwak</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+S">Seokju Cho</a>, <a href="/search/cs?searchtype=author&query=Baek%2C+J">JeongYeol Baek</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+B">Byeongwon Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Seungryong Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16042v2-abstract-short" style="display: inline;"> Person re-identification (Re-ID) often faces challenges due to variations in human poses and camera viewpoints, which significantly affect the appearance of individuals across images. Existing datasets frequently lack diversity and scalability in these aspects, hindering the generalization of Re-ID models to new camera systems. We propose Pose-dIVE, a novel data augmentation approach that incorpor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16042v2-abstract-full').style.display = 'inline'; document.getElementById('2406.16042v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16042v2-abstract-full" style="display: none;"> Person re-identification (Re-ID) often faces challenges due to variations in human poses and camera viewpoints, which significantly affect the appearance of individuals across images. Existing datasets frequently lack diversity and scalability in these aspects, hindering the generalization of Re-ID models to new camera systems. We propose Pose-dIVE, a novel data augmentation approach that incorporates sparse and underrepresented human pose and camera viewpoint examples into the training data, addressing the limited diversity in the original training data distribution. Our objective is to augment the training dataset to enable existing Re-ID models to learn features unbiased by human pose and camera viewpoint variations. To achieve this, we leverage the knowledge of pre-trained large-scale diffusion models. By conditioning the diffusion model on both the human pose and camera viewpoint concurrently through the SMPL model, we generate training data with diverse human poses and camera viewpoints. Experimental results demonstrate the effectiveness of our method in addressing human pose bias and enhancing the generalizability of Re-ID models compared to other data augmentation-based Re-ID approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16042v2-abstract-full').style.display = 'none'; document.getElementById('2406.16042v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13873">arXiv:2406.13873</a> <span> [<a href="https://arxiv.org/pdf/2406.13873">pdf</a>, <a href="https://arxiv.org/format/2406.13873">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Pure Transformer Pretraining Framework on Text-attributed Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yu Song</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+H">Haitao Mao</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+J">Jiachen Xiao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingzhe Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhikai Chen</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Carl Yang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiliang Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13873v1-abstract-short" style="display: inline;"> Pretraining plays a pivotal role in acquiring generalized knowledge from large-scale data, achieving remarkable successes as evidenced by large models in CV and NLP. However, progress in the graph domain remains limited due to fundamental challenges such as feature heterogeneity and structural heterogeneity. Recently, increasing efforts have been made to enhance node feature quality with Large Lan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13873v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13873v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13873v1-abstract-full" style="display: none;"> Pretraining plays a pivotal role in acquiring generalized knowledge from large-scale data, achieving remarkable successes as evidenced by large models in CV and NLP. However, progress in the graph domain remains limited due to fundamental challenges such as feature heterogeneity and structural heterogeneity. Recently, increasing efforts have been made to enhance node feature quality with Large Language Models (LLMs) on text-attributed graphs (TAGs), demonstrating superiority to traditional bag-of-words or word2vec techniques. These high-quality node features reduce the previously critical role of graph structure, resulting in a modest performance gap between Graph Neural Networks (GNNs) and structure-agnostic Multi-Layer Perceptrons (MLPs). Motivated by this, we introduce a feature-centric pretraining perspective by treating graph structure as a prior and leveraging the rich, unified feature space to learn refined interaction patterns that generalizes across graphs. Our framework, Graph Sequence Pretraining with Transformer (GSPT), samples node contexts through random walks and employs masked feature reconstruction to capture pairwise proximity in the LLM-unified feature space using a standard Transformer. By utilizing unified text representations rather than varying structures, our framework achieves significantly better transferability among graphs within the same domain. GSPT can be easily adapted to both node classification and link prediction, demonstrating promising empirical success on various datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13873v1-abstract-full').style.display = 'none'; document.getElementById('2406.13873v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10727">arXiv:2406.10727</a> <span> [<a href="https://arxiv.org/pdf/2406.10727">pdf</a>, <a href="https://arxiv.org/format/2406.10727">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Text-space Graph Foundation Models: Comprehensive Benchmarks and New Insights </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhikai Chen</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+H">Haitao Mao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingzhe Liu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yu Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bingheng Li</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a>, <a href="/search/cs?searchtype=author&query=Fatemi%2C+B">Bahare Fatemi</a>, <a href="/search/cs?searchtype=author&query=Tsitsulin%2C+A">Anton Tsitsulin</a>, <a href="/search/cs?searchtype=author&query=Perozzi%2C+B">Bryan Perozzi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiliang Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10727v1-abstract-short" style="display: inline;"> Given the ubiquity of graph data and its applications in diverse domains, building a Graph Foundation Model (GFM) that can work well across different graphs and tasks with a unified backbone has recently garnered significant interests. A major obstacle to achieving this goal stems from the fact that graphs from different domains often exhibit diverse node features. Inspired by multi-modal models t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10727v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10727v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10727v1-abstract-full" style="display: none;"> Given the ubiquity of graph data and its applications in diverse domains, building a Graph Foundation Model (GFM) that can work well across different graphs and tasks with a unified backbone has recently garnered significant interests. A major obstacle to achieving this goal stems from the fact that graphs from different domains often exhibit diverse node features. Inspired by multi-modal models that align different modalities with natural language, the text has recently been adopted to provide a unified feature space for diverse graphs. Despite the great potential of these text-space GFMs, current research in this field is hampered by two problems. First, the absence of a comprehensive benchmark with unified problem settings hinders a clear understanding of the comparative effectiveness and practical value of different text-space GFMs. Second, there is a lack of sufficient datasets to thoroughly explore the methods' full potential and verify their effectiveness across diverse settings. To address these issues, we conduct a comprehensive benchmark providing novel text-space datasets and comprehensive evaluation under unified problem settings. Empirical results provide new insights and inspire future research directions. Our code and data are publicly available from \url{https://github.com/CurryTang/TSGFM}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10727v1-abstract-full').style.display = 'none'; document.getElementById('2406.10727v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preliminary version: if you find any mistakes regarding the evaluation, feel free to contact the first author</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10475">arXiv:2406.10475</a> <span> [<a href="https://arxiv.org/pdf/2406.10475">pdf</a>, <a href="https://arxiv.org/format/2406.10475">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Discrete Latent Perspective Learning for Segmentation and Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+D">Deyi Ji</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+F">Feng Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lanyun Zhu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wenwei Jin</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Hongtao Lu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jieping Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10475v1-abstract-short" style="display: inline;"> In this paper, we address the challenge of Perspective-Invariant Learning in machine learning and computer vision, which involves enabling a network to understand images from varying perspectives to achieve consistent semantic interpretation. While standard approaches rely on the labor-intensive collection of multi-view images or limited data augmentation techniques, we propose a novel framework,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10475v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10475v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10475v1-abstract-full" style="display: none;"> In this paper, we address the challenge of Perspective-Invariant Learning in machine learning and computer vision, which involves enabling a network to understand images from varying perspectives to achieve consistent semantic interpretation. While standard approaches rely on the labor-intensive collection of multi-view images or limited data augmentation techniques, we propose a novel framework, Discrete Latent Perspective Learning (DLPL), for latent multi-perspective fusion learning using conventional single-view images. DLPL comprises three main modules: Perspective Discrete Decomposition (PDD), Perspective Homography Transformation (PHT), and Perspective Invariant Attention (PIA), which work together to discretize visual features, transform perspectives, and fuse multi-perspective semantic information, respectively. DLPL is a universal perspective learning framework applicable to a variety of scenarios and vision tasks. Extensive experiments demonstrate that DLPL significantly enhances the network's capacity to depict images across diverse scenarios (daily photos, UAV, auto-driving) and tasks (detection, segmentation). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10475v1-abstract-full').style.display = 'none'; document.getElementById('2406.10475v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2024 Spotlight</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09681">arXiv:2406.09681</a> <span> [<a href="https://arxiv.org/pdf/2406.09681">pdf</a>, <a href="https://arxiv.org/format/2406.09681">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Asymmetrical Siamese Network for Point Clouds Normal Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nannan Li</a>, <a href="/search/cs?searchtype=author&query=Madeline%2C+H">Haba Madeline</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiuping Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09681v2-abstract-short" style="display: inline;"> In recent years, deep learning-based point cloud normal estimation has made great progress. However, existing methods mainly rely on the PCPNet dataset, leading to overfitting. In addition, the correlation between point clouds with different noise scales remains unexplored, resulting in poor performance in cross-domain scenarios. In this paper, we explore the consistency of intrinsic features lear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09681v2-abstract-full').style.display = 'inline'; document.getElementById('2406.09681v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09681v2-abstract-full" style="display: none;"> In recent years, deep learning-based point cloud normal estimation has made great progress. However, existing methods mainly rely on the PCPNet dataset, leading to overfitting. In addition, the correlation between point clouds with different noise scales remains unexplored, resulting in poor performance in cross-domain scenarios. In this paper, we explore the consistency of intrinsic features learned from clean and noisy point clouds using an Asymmetric Siamese Network architecture. By applying reasonable constraints between features extracted from different branches, we enhance the quality of normal estimation. Moreover, we introduce a novel multi-view normal estimation dataset that includes a larger variety of shapes with different noise levels. Evaluation of existing methods on this new dataset reveals their inability to adapt to different types of shapes, indicating a degree of overfitting. Extensive experiments show that the proposed dataset poses significant challenges for point cloud normal estimation and that our feature constraint mechanism effectively improves upon existing methods and reduces overfitting in current architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09681v2-abstract-full').style.display = 'none'; document.getElementById('2406.09681v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.06016">arXiv:2406.06016</a> <span> [<a href="https://arxiv.org/pdf/2406.06016">pdf</a>, <a href="https://arxiv.org/format/2406.06016">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> EpiLearn: A Python Library for Machine Learning in Epidemic Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zewen Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunxiao Li</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+M">Mingyang Wei</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+G">Guancheng Wan</a>, <a href="/search/cs?searchtype=author&query=Lau%2C+M+S+Y">Max S. Y. Lau</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.06016v2-abstract-short" style="display: inline;"> EpiLearn is a Python toolkit developed for modeling, simulating, and analyzing epidemic data. Although there exist several packages that also deal with epidemic modeling, they are often restricted to mechanistic models or traditional statistical tools. As machine learning continues to shape the world, the gap between these packages and the latest models has become larger. To bridge the gap and ins… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06016v2-abstract-full').style.display = 'inline'; document.getElementById('2406.06016v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.06016v2-abstract-full" style="display: none;"> EpiLearn is a Python toolkit developed for modeling, simulating, and analyzing epidemic data. Although there exist several packages that also deal with epidemic modeling, they are often restricted to mechanistic models or traditional statistical tools. As machine learning continues to shape the world, the gap between these packages and the latest models has become larger. To bridge the gap and inspire innovative research in epidemic modeling, EpiLearn not only provides support for evaluating epidemic models based on machine learning, but also incorporates comprehensive tools for analyzing epidemic data, such as simulation, visualization, transformations, etc. For the convenience of both epidemiologists and data scientists, we provide a unified framework for training and evaluation of epidemic models on two tasks: Forecasting and Source Detection. To facilitate the development of new models, EpiLearn follows a modular design, making it flexible and easy to use. In addition, an interactive web application is also developed to visualize the real-world or simulated epidemic data. Our package is available at https://github.com/Emory-Melody/EpiLearn. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06016v2-abstract-full').style.display = 'none'; document.getElementById('2406.06016v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18768">arXiv:2405.18768</a> <span> [<a href="https://arxiv.org/pdf/2405.18768">pdf</a>, <a href="https://arxiv.org/format/2405.18768">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RNAFlow: RNA Structure & Sequence Design via Inverse Folding-Based Flow Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nori%2C+D">Divya Nori</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wengong Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18768v2-abstract-short" style="display: inline;"> The growing significance of RNA engineering in diverse biological applications has spurred interest in developing AI methods for structure-based RNA design. While diffusion models have excelled in protein design, adapting them for RNA presents new challenges due to RNA's conformational flexibility and the computational cost of fine-tuning large structure prediction models. To this end, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18768v2-abstract-full').style.display = 'inline'; document.getElementById('2405.18768v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18768v2-abstract-full" style="display: none;"> The growing significance of RNA engineering in diverse biological applications has spurred interest in developing AI methods for structure-based RNA design. While diffusion models have excelled in protein design, adapting them for RNA presents new challenges due to RNA's conformational flexibility and the computational cost of fine-tuning large structure prediction models. To this end, we propose RNAFlow, a flow matching model for protein-conditioned RNA sequence-structure design. Its denoising network integrates an RNA inverse folding model and a pre-trained RosettaFold2NA network for generation of RNA sequences and structures. The integration of inverse folding in the structure denoising process allows us to simplify training by fixing the structure prediction network. We further enhance the inverse folding model by conditioning it on inferred conformational ensembles to model dynamic RNA conformations. Evaluation on protein-conditioned RNA structure and sequence generation tasks demonstrates RNAFlow's advantage over existing RNA design methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18768v2-abstract-full').style.display = 'none'; document.getElementById('2405.18768v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16113">arXiv:2405.16113</a> <span> [<a href="https://arxiv.org/pdf/2405.16113">pdf</a>, <a href="https://arxiv.org/format/2405.16113">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Enabling On-Device Learning via Experience Replay with Efficient Dataset Condensation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+G">Gelei Xu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+N">Ningzhi Tang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+J">Jun Xia</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yiyu Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16113v1-abstract-short" style="display: inline;"> Upon deployment to edge devices, it is often desirable for a model to further learn from streaming data to improve accuracy. However, extracting representative features from such data is challenging because it is typically unlabeled, non-independent and identically distributed (non-i.i.d), and is seen only once. To mitigate this issue, a common strategy is to maintain a small data buffer on the ed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16113v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16113v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16113v1-abstract-full" style="display: none;"> Upon deployment to edge devices, it is often desirable for a model to further learn from streaming data to improve accuracy. However, extracting representative features from such data is challenging because it is typically unlabeled, non-independent and identically distributed (non-i.i.d), and is seen only once. To mitigate this issue, a common strategy is to maintain a small data buffer on the edge device to hold the most representative data for further learning. As most data is either never stored or quickly discarded, identifying the most representative data to avoid significant information loss becomes critical. In this paper, we propose an on-device framework that addresses this issue by condensing incoming data into more informative samples. Specifically, to effectively handle unlabeled incoming data, we propose a pseudo-labeling technique designed for unlabeled on-device learning environments. Additionally, we develop a dataset condensation technique that only requires little computation resources. To counteract the effects of noisy labels during the condensation process, we further utilize a contrastive learning objective to improve the purity of class data within the buffer. Our empirical results indicate substantial improvements over existing methods, particularly when buffer capacity is severely restricted. For instance, with a buffer capacity of just one sample per class, our method achieves an accuracy that outperforms the best existing baseline by 58.4% on the CIFAR-10 dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16113v1-abstract-full').style.display = 'none'; document.getElementById('2405.16113v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09470">arXiv:2405.09470</a> <span> [<a href="https://arxiv.org/pdf/2405.09470">pdf</a>, <a href="https://arxiv.org/format/2405.09470">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Towards Evaluating the Robustness of Automatic Speech Recognition Systems via Audio Style Transfer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weifei Jin</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuxin Cao</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Junjie Su</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Q">Qi Shen</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+K">Kai Ye</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Derui Wang</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jie Hao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziyao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09470v1-abstract-short" style="display: inline;"> In light of the widespread application of Automatic Speech Recognition (ASR) systems, their security concerns have received much more attention than ever before, primarily due to the susceptibility of Deep Neural Networks. Previous studies have illustrated that surreptitiously crafting adversarial perturbations enables the manipulation of speech recognition systems, resulting in the production of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09470v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09470v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09470v1-abstract-full" style="display: none;"> In light of the widespread application of Automatic Speech Recognition (ASR) systems, their security concerns have received much more attention than ever before, primarily due to the susceptibility of Deep Neural Networks. Previous studies have illustrated that surreptitiously crafting adversarial perturbations enables the manipulation of speech recognition systems, resulting in the production of malicious commands. These attack methods mostly require adding noise perturbations under $\ell_p$ norm constraints, inevitably leaving behind artifacts of manual modifications. Recent research has alleviated this limitation by manipulating style vectors to synthesize adversarial examples based on Text-to-Speech (TTS) synthesis audio. However, style modifications based on optimization objectives significantly reduce the controllability and editability of audio styles. In this paper, we propose an attack on ASR systems based on user-customized style transfer. We first test the effect of Style Transfer Attack (STA) which combines style transfer and adversarial attack in sequential order. And then, as an improvement, we propose an iterative Style Code Attack (SCA) to maintain audio quality. Experimental results show that our method can meet the need for user-customized styles and achieve a success rate of 82% in attacks, while keeping sound naturalness due to our user study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09470v1-abstract-full').style.display = 'none'; document.getElementById('2405.09470v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to SecTL (AsiaCCS Workshop) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.08205">arXiv:2405.08205</a> <span> [<a href="https://arxiv.org/pdf/2405.08205">pdf</a>, <a href="https://arxiv.org/format/2405.08205">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generative Enzyme Design Guided by Functionally Important Sites and Small-Molecule Substrates </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhenqiao Song</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yunlong Zhao</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenxian Shi</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wengong Jin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lei Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.08205v3-abstract-short" style="display: inline;"> Enzymes are genetically encoded biocatalysts capable of accelerating chemical reactions. How can we automatically design functional enzymes? In this paper, we propose EnzyGen, an approach to learn a unified model to design enzymes across all functional families. Our key idea is to generate an enzyme's amino acid sequence and their three-dimensional (3D) coordinates based on functionally important… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08205v3-abstract-full').style.display = 'inline'; document.getElementById('2405.08205v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.08205v3-abstract-full" style="display: none;"> Enzymes are genetically encoded biocatalysts capable of accelerating chemical reactions. How can we automatically design functional enzymes? In this paper, we propose EnzyGen, an approach to learn a unified model to design enzymes across all functional families. Our key idea is to generate an enzyme's amino acid sequence and their three-dimensional (3D) coordinates based on functionally important sites and substrates corresponding to a desired catalytic function. These sites are automatically mined from enzyme databases. EnzyGen consists of a novel interleaving network of attention and neighborhood equivariant layers, which captures both long-range correlation in an entire protein sequence and local influence from nearest amino acids in 3D space. To learn the generative model, we devise a joint training objective, including a sequence generation loss, a position prediction loss and an enzyme-substrate interaction loss. We further construct EnzyBench, a dataset with 3157 enzyme families, covering all available enzymes within the protein data bank (PDB). Experimental results show that our EnzyGen consistently achieves the best performance across all 323 testing families, surpassing the best baseline by 10.79% in terms of substrate binding affinity. These findings demonstrate EnzyGen's superior capability in designing well-folded and effective enzymes binding to specific substrates with high affinities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08205v3-abstract-full').style.display = 'none'; document.getElementById('2405.08205v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06693">arXiv:2405.06693</a> <span> [<a href="https://arxiv.org/pdf/2405.06693">pdf</a>, <a href="https://arxiv.org/format/2405.06693">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SurfPro: Functional Protein Design Based on Continuous Surface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhenqiao Song</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tinglin Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lei Li</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wengong Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06693v2-abstract-short" style="display: inline;"> How can we design proteins with desired functions? We are motivated by a chemical intuition that both geometric structure and biochemical properties are critical to a protein's function. In this paper, we propose SurfPro, a new method to generate functional proteins given a desired surface and its associated biochemical properties. SurfPro comprises a hierarchical encoder that progressively models… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06693v2-abstract-full').style.display = 'inline'; document.getElementById('2405.06693v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06693v2-abstract-full" style="display: none;"> How can we design proteins with desired functions? We are motivated by a chemical intuition that both geometric structure and biochemical properties are critical to a protein's function. In this paper, we propose SurfPro, a new method to generate functional proteins given a desired surface and its associated biochemical properties. SurfPro comprises a hierarchical encoder that progressively models the geometric shape and biochemical features of a protein surface, and an autoregressive decoder to produce an amino acid sequence. We evaluate SurfPro on a standard inverse folding benchmark CATH 4.2 and two functional protein design tasks: protein binder design and enzyme design. Our SurfPro consistently surpasses previous state-of-the-art inverse folding methods, achieving a recovery rate of 57.78% on CATH 4.2 and higher success rates in terms of protein-protein binding and enzyme-substrate interaction scores. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06693v2-abstract-full').style.display = 'none'; document.getElementById('2405.06693v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16970">arXiv:2404.16970</a> <span> [<a href="https://arxiv.org/pdf/2404.16970">pdf</a>, <a href="https://arxiv.org/format/2404.16970">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> CarbonCP: Carbon-Aware DNN Partitioning with Conformal Prediction for Sustainable Edge Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ke%2C+H">Hongyu Ke</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wanxin Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoxin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16970v1-abstract-short" style="display: inline;"> This paper presents a solution to address carbon emission mitigation for end-to-end edge computing systems, including the computing at battery-powered edge devices and servers, as well as the communications between them. We design and implement, CarbonCP, a context-adaptive, carbon-aware, and uncertainty-aware AI inference framework built upon conformal prediction theory, which balances operationa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16970v1-abstract-full').style.display = 'inline'; document.getElementById('2404.16970v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16970v1-abstract-full" style="display: none;"> This paper presents a solution to address carbon emission mitigation for end-to-end edge computing systems, including the computing at battery-powered edge devices and servers, as well as the communications between them. We design and implement, CarbonCP, a context-adaptive, carbon-aware, and uncertainty-aware AI inference framework built upon conformal prediction theory, which balances operational carbon emissions, end-to-end latency, and battery consumption of edge devices through DNN partitioning under varying system processing contexts and carbon intensity. Our experimental results demonstrate that CarbonCP is effective in substantially reducing operational carbon emissions, up to 58.8%, while maintaining key user-centric performance metrics with only 9.9% error rate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16970v1-abstract-full').style.display = 'none'; document.getElementById('2404.16970v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13541">arXiv:2404.13541</a> <span> [<a href="https://arxiv.org/pdf/2404.13541">pdf</a>, <a href="https://arxiv.org/format/2404.13541">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Generalizable Novel-View Synthesis using a Stereo Camera </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+H">Haechan Lee</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wonjoon Jin</a>, <a href="/search/cs?searchtype=author&query=Baek%2C+S">Seung-Hwan Baek</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+S">Sunghyun Cho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13541v1-abstract-short" style="display: inline;"> In this paper, we propose the first generalizable view synthesis approach that specifically targets multi-view stereo-camera images. Since recent stereo matching has demonstrated accurate geometry prediction, we introduce stereo matching into novel-view synthesis for high-quality geometry reconstruction. To this end, this paper proposes a novel framework, dubbed StereoNeRF, which integrates stereo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13541v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13541v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13541v1-abstract-full" style="display: none;"> In this paper, we propose the first generalizable view synthesis approach that specifically targets multi-view stereo-camera images. Since recent stereo matching has demonstrated accurate geometry prediction, we introduce stereo matching into novel-view synthesis for high-quality geometry reconstruction. To this end, this paper proposes a novel framework, dubbed StereoNeRF, which integrates stereo matching into a NeRF-based generalizable view synthesis approach. StereoNeRF is equipped with three key components to effectively exploit stereo matching in novel-view synthesis: a stereo feature extractor, a depth-guided plane-sweeping, and a stereo depth loss. Moreover, we propose the StereoNVS dataset, the first multi-view dataset of stereo-camera images, encompassing a wide variety of both real and synthetic scenes. Our experimental results demonstrate that StereoNeRF surpasses previous approaches in generalizable view synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13541v1-abstract-full').style.display = 'none'; document.getElementById('2404.13541v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CVPR 2024. Project page URL: https://jinwonjoon.github.io/stereonerf/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.19852">arXiv:2403.19852</a> <span> [<a href="https://arxiv.org/pdf/2403.19852">pdf</a>, <a href="https://arxiv.org/format/2403.19852">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> A Review of Graph Neural Networks in Epidemic Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zewen Liu</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+G">Guancheng Wan</a>, <a href="/search/cs?searchtype=author&query=Prakash%2C+B+A">B. Aditya Prakash</a>, <a href="/search/cs?searchtype=author&query=Lau%2C+M+S+Y">Max S. Y. Lau</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wei Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.19852v4-abstract-short" style="display: inline;"> Since the onset of the COVID-19 pandemic, there has been a growing interest in studying epidemiological models. Traditional mechanistic models mathematically describe the transmission mechanisms of infectious diseases. However, they often suffer from limitations of oversimplified or fixed assumptions, which could cause sub-optimal predictive power and inefficiency in capturing complex relation inf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19852v4-abstract-full').style.display = 'inline'; document.getElementById('2403.19852v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.19852v4-abstract-full" style="display: none;"> Since the onset of the COVID-19 pandemic, there has been a growing interest in studying epidemiological models. Traditional mechanistic models mathematically describe the transmission mechanisms of infectious diseases. However, they often suffer from limitations of oversimplified or fixed assumptions, which could cause sub-optimal predictive power and inefficiency in capturing complex relation information. Consequently, Graph Neural Networks(GNNs) have emerged as a progressively popular tool in epidemic research. In this paper, we endeavor to furnish a comprehensive review of GNNs in epidemic tasks and highlight potential future directions. To accomplish this objective, we introduce hierarchical taxonomies for both epidemic tasks and methodologies, offering a trajectory of development within this domain. For epidemic tasks, we establish a taxonomy akin to those typically employed within the epidemic domain. For methodology, we categorize existing work into Neural Models and Hybrid Models. Following this, we perform an exhaustive and systematic examination of the methodologies, encompassing both the tasks and their technical details. Furthermore, we discuss the limitations of existing methods from diverse perspectives and systematically propose future research directions. This survey aims to bridge literature gaps and promote the progression of this promising field, with a list of relevant papers at https://github.com/Emory-Melody/awesome-epidemic-modeling-papers. We hope that it will facilitate synergies between the communities of GNNs and epidemiology, and contribute to their collective progress. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19852v4-abstract-full').style.display = 'none'; document.getElementById('2403.19852v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07932">arXiv:2403.07932</a> <span> [<a href="https://arxiv.org/pdf/2403.07932">pdf</a>, <a href="https://arxiv.org/format/2403.07932">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Feint in Multi-Player Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wangkai Jin</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+X">Xiangjun Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07932v1-abstract-short" style="display: inline;"> This paper introduces the first formalization, implementation and quantitative evaluation of Feint in Multi-Player Games. Our work first formalizes Feint from the perspective of Multi-Player Games, in terms of the temporal, spatial, and their collective impacts. The formalization is built upon Non-transitive Active Markov Game Model, where Feint can have a considerable amount of impacts. Then, our… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07932v1-abstract-full').style.display = 'inline'; document.getElementById('2403.07932v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07932v1-abstract-full" style="display: none;"> This paper introduces the first formalization, implementation and quantitative evaluation of Feint in Multi-Player Games. Our work first formalizes Feint from the perspective of Multi-Player Games, in terms of the temporal, spatial, and their collective impacts. The formalization is built upon Non-transitive Active Markov Game Model, where Feint can have a considerable amount of impacts. Then, our work considers practical implementation details of Feint in Multi-Player Games, under the state-of-the-art progress of multi-agent modeling to date (namely Multi-Agent Reinforcement Learning). Finally, our work quantitatively examines the effectiveness of our design, and the results show that our design of Feint can (1) greatly improve the reward gains from the game; (2) significantly improve the diversity of Multi-Player Games; and (3) only incur negligible overheads in terms of time consumption. We conclude that our design of Feint is effective and practical, to make Multi-Player Games more interesting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07932v1-abstract-full').style.display = 'none'; document.getElementById('2403.07932v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07931">arXiv:2403.07931</a> <span> [<a href="https://arxiv.org/pdf/2403.07931">pdf</a>, <a href="https://arxiv.org/format/2403.07931">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Formalizing Feint Actions, and Example Studies in Two-Player Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wangkai Jin</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+X">Xiangjun Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07931v1-abstract-short" style="display: inline;"> Feint actions refer to a set of deceptive actions, which enable players to obtain temporal advantages from their opponents. Such actions are regarded as widely-used tactic in most non-deterministic Two-player Games (e.g. boxing and fencing). However, existing literature does not provide comprehensive and concrete formalization on Feint actions, and their implications on Two-Player Games. We argue… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07931v1-abstract-full').style.display = 'inline'; document.getElementById('2403.07931v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07931v1-abstract-full" style="display: none;"> Feint actions refer to a set of deceptive actions, which enable players to obtain temporal advantages from their opponents. Such actions are regarded as widely-used tactic in most non-deterministic Two-player Games (e.g. boxing and fencing). However, existing literature does not provide comprehensive and concrete formalization on Feint actions, and their implications on Two-Player Games. We argue that a full exploration on Feint actions is of great importance towards more realistic Two-player Games. In this paper, we provide the first comprehensive and concrete formalization of Feint actions. The key idea of our work is to (1) allow automatic generation of Feint actions, via our proposed Palindrome-directed Generation of Feint actions; and (2) provide concrete principles to properly combine Feint and attack actions. Based on our formalization of Feint actions, we also explore the implications on the game strategy model, and provide optimizations to better incorporate Feint actions. Our experimental results shows that accounting for Feint actions in Non-Deterministic Games (1) brings overall benefits to the game design; and (2) has great benefits on on either game animations or strategy designs, which also introduces a great extent of randomness into randomness-demanded Game models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07931v1-abstract-full').style.display = 'none'; document.getElementById('2403.07931v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18934">arXiv:2402.18934</a> <span> [<a href="https://arxiv.org/pdf/2402.18934">pdf</a>, <a href="https://arxiv.org/format/2402.18934">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> RELEAD: Resilient Localization with Enhanced LiDAR Odometry in Adverse Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhiqiang Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongbo Chen</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Y">Yuhua Qi</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+S">Shipeng Zhong</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+D">Dapeng Feng</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wu Jin</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+W">Weisong Wen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18934v2-abstract-short" style="display: inline;"> LiDAR-based localization is valuable for applications like mining surveys and underground facility maintenance. However, existing methods can struggle when dealing with uninformative geometric structures in challenging scenarios. This paper presents RELEAD, a LiDAR-centric solution designed to address scan-matching degradation. Our method enables degeneracy-free point cloud registration by solving… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18934v2-abstract-full').style.display = 'inline'; document.getElementById('2402.18934v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18934v2-abstract-full" style="display: none;"> LiDAR-based localization is valuable for applications like mining surveys and underground facility maintenance. However, existing methods can struggle when dealing with uninformative geometric structures in challenging scenarios. This paper presents RELEAD, a LiDAR-centric solution designed to address scan-matching degradation. Our method enables degeneracy-free point cloud registration by solving constrained ESIKF updates in the front end and incorporates multisensor constraints, even when dealing with outlier measurements, through graph optimization based on Graduated Non-Convexity (GNC). Additionally, we propose a robust Incremental Fixed Lag Smoother (rIFL) for efficient GNC-based optimization. RELEAD has undergone extensive evaluation in degenerate scenarios and has outperformed existing state-of-the-art LiDAR-Inertial odometry and LiDAR-Visual-Inertial odometry methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18934v2-abstract-full').style.display = 'none'; document.getElementById('2402.18934v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> published in ICRA 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18127">arXiv:2402.18127</a> <span> [<a href="https://arxiv.org/pdf/2402.18127">pdf</a>, <a href="https://arxiv.org/format/2402.18127">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Multi-Relational Graph Representation Learning for Large-Scale Prediction of Drug-Drug Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+M">Mengying Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guizhong Liu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yuanchao Su</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Weiqiang Jin</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Biao Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18127v1-abstract-short" style="display: inline;"> Most existing methods for predicting drug-drug interactions (DDI) predominantly concentrate on capturing the explicit relationships among drugs, overlooking the valuable implicit correlations present between drug pairs (DPs), which leads to weak predictions. To address this issue, this paper introduces a hierarchical multi-relational graph representation learning (HMGRL) approach. Within the frame… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18127v1-abstract-full').style.display = 'inline'; document.getElementById('2402.18127v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18127v1-abstract-full" style="display: none;"> Most existing methods for predicting drug-drug interactions (DDI) predominantly concentrate on capturing the explicit relationships among drugs, overlooking the valuable implicit correlations present between drug pairs (DPs), which leads to weak predictions. To address this issue, this paper introduces a hierarchical multi-relational graph representation learning (HMGRL) approach. Within the framework of HMGRL, we leverage a wealth of drug-related heterogeneous data sources to construct heterogeneous graphs, where nodes represent drugs and edges denote clear and various associations. The relational graph convolutional network (RGCN) is employed to capture diverse explicit relationships between drugs from these heterogeneous graphs. Additionally, a multi-view differentiable spectral clustering (MVDSC) module is developed to capture multiple valuable implicit correlations between DPs. Within the MVDSC, we utilize multiple DP features to construct graphs, where nodes represent DPs and edges denote different implicit correlations. Subsequently, multiple DP representations are generated through graph cutting, each emphasizing distinct implicit correlations. The graph-cutting strategy enables our HMGRL to identify strongly connected communities of graphs, thereby reducing the fusion of irrelevant features. By combining every representation view of a DP, we create high-level DP representations for predicting DDIs. Two genuine datasets spanning three distinct tasks are adopted to gauge the efficacy of our HMGRL. Experimental outcomes unequivocally indicate that HMGRL surpasses several leading-edge methods in performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18127v1-abstract-full').style.display = 'none'; document.getElementById('2402.18127v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages,10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.16181">arXiv:2402.16181</a> <span> [<a href="https://arxiv.org/pdf/2402.16181">pdf</a>, <a href="https://arxiv.org/format/2402.16181">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> How Can LLM Guide RL? A Value-Based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shenao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Sirui Zheng</a>, <a href="/search/cs?searchtype=author&query=Ke%2C+S">Shuqi Ke</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhihan Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+W">Wanxin Jin</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jianbo Yuan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yingxiang Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongxia Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhaoran Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.16181v1-abstract-short" style="display: inline;"> Reinforcement learning (RL) has become the de facto standard practice for sequential decision-making problems by improving future acting policies with feedback. However, RL algorithms may require extensive trial-and-error interactions to collect useful feedback for improvement. On the other hand, recent developments in large language models (LLMs) have showcased impressive capabilities in language… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16181v1-abstract-full').style.display = 'inline'; document.getElementById('2402.16181v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.16181v1-abstract-full" style="display: none;"> Reinforcement learning (RL) has become the de facto standard practice for sequential decision-making problems by improving future acting policies with feedback. However, RL algorithms may require extensive trial-and-error interactions to collect useful feedback for improvement. On the other hand, recent developments in large language models (LLMs) have showcased impressive capabilities in language understanding and generation, yet they fall short in exploration and self-improvement capabilities for planning tasks, lacking the ability to autonomously refine their responses based on feedback. Therefore, in this paper, we study how the policy prior provided by the LLM can enhance the sample efficiency of RL algorithms. Specifically, we develop an algorithm named LINVIT that incorporates LLM guidance as a regularization factor in value-based RL, leading to significant reductions in the amount of data needed for learning, particularly when the difference between the ideal policy and the LLM-informed policy is small, which suggests that the initial policy is close to optimal, reducing the need for further exploration. Additionally, we present a practical algorithm SLINVIT that simplifies the construction of the value function and employs subgoals to reduce the search complexity. Our experiments across three interactive environments ALFWorld, InterCode, and BlocksWorld demonstrate that our method achieves state-of-the-art success rates and also surpasses previous RL and LLM approaches in terms of sample efficiency. Our code is available at https://github.com/agentification/Language-Integrated-VI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16181v1-abstract-full').style.display = 'none'; document.getElementById('2402.16181v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jin%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jin%2C+W&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>