Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 510 results for author: <span class="mathjax">Luo, H</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Luo%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Luo, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Luo%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Luo, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Luo%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02414">arXiv:2502.02414</a> <span> [<a href="https://arxiv.org/pdf/2502.02414">pdf</a>, <a href="https://arxiv.org/format/2502.02414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Transolver++: An Accurate Neural Solver for PDEs on Million-Scale Geometries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Huakun Luo</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haixu Wu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hang Zhou</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+L">Lanxiang Xing</a>, <a href="/search/cs?searchtype=author&query=Di%2C+Y">Yichen Di</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianmin Wang</a>, <a href="/search/cs?searchtype=author&query=Long%2C+M">Mingsheng Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02414v2-abstract-short" style="display: inline;"> Although deep models have been widely explored in solving partial differential equations (PDEs), previous works are primarily limited to data only with up to tens of thousands of mesh points, far from the million-point scale required by industrial simulations that involve complex geometries. In the spirit of advancing neural PDE solvers to real industrial applications, we present Transolver++, a h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02414v2-abstract-full').style.display = 'inline'; document.getElementById('2502.02414v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02414v2-abstract-full" style="display: none;"> Although deep models have been widely explored in solving partial differential equations (PDEs), previous works are primarily limited to data only with up to tens of thousands of mesh points, far from the million-point scale required by industrial simulations that involve complex geometries. In the spirit of advancing neural PDE solvers to real industrial applications, we present Transolver++, a highly parallel and efficient neural solver that can accurately solve PDEs on million-scale geometries. Building upon previous advancements in solving PDEs by learning physical states via Transolver, Transolver++ is further equipped with an extremely optimized parallelism framework and a local adaptive mechanism to efficiently capture eidetic physical states from massive mesh points, successfully tackling the thorny challenges in computation and physics learning when scaling up input mesh size. Transolver++ increases the single-GPU input capacity to million-scale points for the first time and is capable of continuously scaling input size in linear complexity by increasing GPUs. Experimentally, Transolver++ yields 13% relative promotion across six standard PDE benchmarks and achieves over 20% performance gain in million-scale high-fidelity industrial simulations, whose sizes are 100$\times$ larger than previous benchmarks, covering car and 3D aircraft designs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02414v2-abstract-full').style.display = 'none'; document.getElementById('2502.02414v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02021">arXiv:2502.02021</a> <span> [<a href="https://arxiv.org/pdf/2502.02021">pdf</a>, <a href="https://arxiv.org/format/2502.02021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Multi-illuminant Color Constancy via Multi-scale Illuminant Estimation and Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hang Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongwei Li</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jinxing Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02021v1-abstract-short" style="display: inline;"> Multi-illuminant color constancy methods aim to eliminate local color casts within an image through pixel-wise illuminant estimation. Existing methods mainly employ deep learning to establish a direct mapping between an image and its illumination map, which neglects the impact of image scales. To alleviate this problem, we represent an illuminant map as the linear combination of components estimat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02021v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02021v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02021v1-abstract-full" style="display: none;"> Multi-illuminant color constancy methods aim to eliminate local color casts within an image through pixel-wise illuminant estimation. Existing methods mainly employ deep learning to establish a direct mapping between an image and its illumination map, which neglects the impact of image scales. To alleviate this problem, we represent an illuminant map as the linear combination of components estimated from multi-scale images. Furthermore, we propose a tri-branch convolution networks to estimate multi-grained illuminant distribution maps from multi-scale images. These multi-grained illuminant maps are merged adaptively with an attentional illuminant fusion module. Through comprehensive experimental analysis and evaluation, the results demonstrate the effectiveness of our method, and it has achieved state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02021v1-abstract-full').style.display = 'none'; document.getElementById('2502.02021v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 4 figures, this manuscript is under the consideration of Optics Express</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00695">arXiv:2502.00695</a> <span> [<a href="https://arxiv.org/pdf/2502.00695">pdf</a>, <a href="https://arxiv.org/format/2502.00695">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TMI-CLNet: Triple-Modal Interaction Network for Chronic Liver Disease Prognosis From Imaging, Clinical, and Radiomic Data Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+L">Linglong Wu</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+X">Xuhao Shan</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+R">Ruiquan Ge</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+R">Ruoyu Liang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chi Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yonghong Li</a>, <a href="/search/cs?searchtype=author&query=Elazab%2C+A">Ahmed Elazab</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Huoling Luo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yunbi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Changmiao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00695v1-abstract-short" style="display: inline;"> Chronic liver disease represents a significant health challenge worldwide and accurate prognostic evaluations are essential for personalized treatment plans. Recent evidence suggests that integrating multimodal data, such as computed tomography imaging, radiomic features, and clinical information, can provide more comprehensive prognostic information. However, modalities have an inherent heterogen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00695v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00695v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00695v1-abstract-full" style="display: none;"> Chronic liver disease represents a significant health challenge worldwide and accurate prognostic evaluations are essential for personalized treatment plans. Recent evidence suggests that integrating multimodal data, such as computed tomography imaging, radiomic features, and clinical information, can provide more comprehensive prognostic information. However, modalities have an inherent heterogeneity, and incorporating additional modalities may exacerbate the challenges of heterogeneous data fusion. Moreover, existing multimodal fusion methods often struggle to adapt to richer medical modalities, making it difficult to capture inter-modal relationships. To overcome these limitations, We present the Triple-Modal Interaction Chronic Liver Network (TMI-CLNet). Specifically, we develop an Intra-Modality Aggregation module and a Triple-Modal Cross-Attention Fusion module, which are designed to eliminate intra-modality redundancy and extract cross-modal information, respectively. Furthermore, we design a Triple-Modal Feature Fusion loss function to align feature representations across modalities. Extensive experiments on the liver prognosis dataset demonstrate that our approach significantly outperforms existing state-of-the-art unimodal models and other multi-modal techniques. Our code is available at https://github.com/Mysterwll/liver.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00695v1-abstract-full').style.display = 'none'; document.getElementById('2502.00695v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures, accepted by IEEE ISBI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00354">arXiv:2502.00354</a> <span> [<a href="https://arxiv.org/pdf/2502.00354">pdf</a>, <a href="https://arxiv.org/format/2502.00354">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3696410.3714561">10.1145/3696410.3714561 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PM-MOE: Mixture of Experts on Private Model Parameters for Personalized Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yu Feng</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+Y">Yangli-ao Geng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zongfu Han</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xie Yu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+K">Kaiwen Xue</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengyang Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guangwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Meina Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00354v1-abstract-short" style="display: inline;"> Federated learning (FL) has gained widespread attention for its privacy-preserving and collaborative learning capabilities. Due to significant statistical heterogeneity, traditional FL struggles to generalize a shared model across diverse data domains. Personalized federated learning addresses this issue by dividing the model into a globally shared part and a locally private part, with the local m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00354v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00354v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00354v1-abstract-full" style="display: none;"> Federated learning (FL) has gained widespread attention for its privacy-preserving and collaborative learning capabilities. Due to significant statistical heterogeneity, traditional FL struggles to generalize a shared model across diverse data domains. Personalized federated learning addresses this issue by dividing the model into a globally shared part and a locally private part, with the local model correcting representation biases introduced by the global model. Nevertheless, locally converged parameters more accurately capture domain-specific knowledge, and current methods overlook the potential benefits of these parameters. To address these limitations, we propose PM-MoE architecture. This architecture integrates a mixture of personalized modules and an energy-based personalized modules denoising, enabling each client to select beneficial personalized parameters from other clients. We applied the PM-MoE architecture to nine recent model-split-based personalized federated learning algorithms, achieving performance improvements with minimal additional training. Extensive experiments on six widely adopted datasets and two heterogeneity settings validate the effectiveness of our approach. The source code is available at \url{https://github.com/dannis97500/PM-MOE}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00354v1-abstract-full').style.display = 'none'; document.getElementById('2502.00354v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18922">arXiv:2501.18922</a> <span> [<a href="https://arxiv.org/pdf/2501.18922">pdf</a>, <a href="https://arxiv.org/format/2501.18922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> KBQA-o1: Agentic Knowledge Base Question Answering with Monte Carlo Tree Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=E%2C+H">Haihong E</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yikai Guo</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Qika Lin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaobao Wu</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+X">Xinyu Mu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenhao Liu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Meina Song</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Tuan%2C+L+A">Luu Anh Tuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18922v1-abstract-short" style="display: inline;"> Knowledge Base Question Answering (KBQA) aims to answer natural language questions with a large-scale structured knowledge base (KB). Despite advancements with large language models (LLMs), KBQA still faces challenges in weak KB awareness, imbalance between effectiveness and efficiency, and high reliance on annotated data. To address these challenges, we propose KBQA-o1, a novel agentic KBQA metho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18922v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18922v1-abstract-full" style="display: none;"> Knowledge Base Question Answering (KBQA) aims to answer natural language questions with a large-scale structured knowledge base (KB). Despite advancements with large language models (LLMs), KBQA still faces challenges in weak KB awareness, imbalance between effectiveness and efficiency, and high reliance on annotated data. To address these challenges, we propose KBQA-o1, a novel agentic KBQA method with Monte Carlo Tree Search (MCTS). It introduces a ReAct-based agent process for stepwise logical form generation with KB environment exploration. Moreover, it employs MCTS, a heuristic search method driven by policy and reward models, to balance agentic exploration's performance and search space. With heuristic exploration, KBQA-o1 generates high-quality annotations for further improvement by incremental fine-tuning. Experimental results show that KBQA-o1 outperforms previous low-resource KBQA methods with limited annotated data, boosting Llama-3.1-8B model's GrailQA F1 performance to 78.5% compared to 48.5% of the previous sota method with GPT-3.5-turbo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18922v1-abstract-full').style.display = 'none'; document.getElementById('2501.18922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18100">arXiv:2501.18100</a> <span> [<a href="https://arxiv.org/pdf/2501.18100">pdf</a>, <a href="https://arxiv.org/format/2501.18100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Panacea: Mitigating Harmful Fine-tuning for Large Language Models via Post-fine-tuning Perturbation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yibo Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tiansheng Huang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+L">Li Shen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+H">Huanjin Yao</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haotian Luo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+R">Rui Liu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+N">Naiqiang Tan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiaxing Huang</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18100v1-abstract-short" style="display: inline;"> Harmful fine-tuning attack introduces significant security risks to the fine-tuning services. Mainstream defenses aim to vaccinate the model such that the later harmful fine-tuning attack is less effective. However, our evaluation results show that such defenses are fragile -- with a few fine-tuning steps, the model still can learn the harmful knowledge. To this end, we do further experiment and f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18100v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18100v1-abstract-full" style="display: none;"> Harmful fine-tuning attack introduces significant security risks to the fine-tuning services. Mainstream defenses aim to vaccinate the model such that the later harmful fine-tuning attack is less effective. However, our evaluation results show that such defenses are fragile -- with a few fine-tuning steps, the model still can learn the harmful knowledge. To this end, we do further experiment and find that an embarrassingly simple solution -- adding purely random perturbations to the fine-tuned model, can recover the model from harmful behavior, though it leads to a degradation in the model's fine-tuning performance. To address the degradation of fine-tuning performance, we further propose Panacea, which optimizes an adaptive perturbation that will be applied to the model after fine-tuning. Panacea maintains model's safety alignment performance without compromising downstream fine-tuning performance. Comprehensive experiments are conducted on different harmful ratios, fine-tuning tasks and mainstream LLMs, where the average harmful scores are reduced by up-to 21.5%, while maintaining fine-tuning performance. As a by-product, we analyze the optimized perturbation and show that different layers in various LLMs have distinct safety coefficients. Source code available at https://github.com/w-yibo/Panacea <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18100v1-abstract-full').style.display = 'none'; document.getElementById('2501.18100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14892">arXiv:2501.14892</a> <span> [<a href="https://arxiv.org/pdf/2501.14892">pdf</a>, <a href="https://arxiv.org/format/2501.14892">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Causal Graphs Meet Thoughts: Enhancing Complex Reasoning in Graph-Augmented LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hang Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jian Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chujun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14892v1-abstract-short" style="display: inline;"> In knowledge-intensive tasks, especially in high-stakes domains like medicine and law, it is critical not only to retrieve relevant information but also to provide causal reasoning and explainability. Large language models (LLMs) have achieved remarkable performance in natural language understanding and generation tasks. However, they often suffer from limitations such as difficulty in incorporati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14892v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14892v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14892v1-abstract-full" style="display: none;"> In knowledge-intensive tasks, especially in high-stakes domains like medicine and law, it is critical not only to retrieve relevant information but also to provide causal reasoning and explainability. Large language models (LLMs) have achieved remarkable performance in natural language understanding and generation tasks. However, they often suffer from limitations such as difficulty in incorporating new knowledge, generating hallucinations, and explaining their reasoning process. To address these challenges, integrating knowledge graphs with Graph Retrieval-Augmented Generation (Graph RAG) has emerged as an effective solution. Traditional Graph RAG methods often rely on simple graph traversal or semantic similarity, which do not capture causal relationships or align well with the model's internal reasoning steps. This paper proposes a novel pipeline that filters large knowledge graphs to emphasize cause-effect edges, aligns the retrieval process with the model's chain-of-thought (CoT), and enhances reasoning through multi-stage path improvements. Experiments on medical question-answering tasks show consistent gains, with up to a 10\% absolute improvement across multiple large language models (LLMs). This approach demonstrates the value of combining causal reasoning with stepwise retrieval, leading to more interpretable and logically grounded solutions for complex queries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14892v1-abstract-full').style.display = 'none'; document.getElementById('2501.14892v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 3 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12666">arXiv:2501.12666</a> <span> [<a href="https://arxiv.org/pdf/2501.12666">pdf</a>, <a href="https://arxiv.org/format/2501.12666">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Explicit Eigenvalue Regularization Improves Sharpness-Aware Minimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haocheng Luo</a>, <a href="/search/cs?searchtype=author&query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+T">Tung Pham</a>, <a href="/search/cs?searchtype=author&query=Harandi%2C+M">Mehrtash Harandi</a>, <a href="/search/cs?searchtype=author&query=Phung%2C+D">Dinh Phung</a>, <a href="/search/cs?searchtype=author&query=Le%2C+T">Trung Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12666v1-abstract-short" style="display: inline;"> Sharpness-Aware Minimization (SAM) has attracted significant attention for its effectiveness in improving generalization across various tasks. However, its underlying principles remain poorly understood. In this work, we analyze SAM's training dynamics using the maximum eigenvalue of the Hessian as a measure of sharpness, and propose a third-order stochastic differential equation (SDE), which reve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12666v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12666v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12666v1-abstract-full" style="display: none;"> Sharpness-Aware Minimization (SAM) has attracted significant attention for its effectiveness in improving generalization across various tasks. However, its underlying principles remain poorly understood. In this work, we analyze SAM's training dynamics using the maximum eigenvalue of the Hessian as a measure of sharpness, and propose a third-order stochastic differential equation (SDE), which reveals that the dynamics are driven by a complex mixture of second- and third-order terms. We show that alignment between the perturbation vector and the top eigenvector is crucial for SAM's effectiveness in regularizing sharpness, but find that this alignment is often inadequate in practice, limiting SAM's efficiency. Building on these insights, we introduce Eigen-SAM, an algorithm that explicitly aims to regularize the top Hessian eigenvalue by aligning the perturbation vector with the leading eigenvector. We validate the effectiveness of our theory and the practical advantages of our proposed approach through comprehensive experiments. Code is available at https://github.com/RitianLuo/EigenSAM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12666v1-abstract-full').style.display = 'none'; document.getElementById('2501.12666v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12570">arXiv:2501.12570</a> <span> [<a href="https://arxiv.org/pdf/2501.12570">pdf</a>, <a href="https://arxiv.org/format/2501.12570">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> O1-Pruner: Length-Harmonizing Fine-Tuning for O1-Like Reasoning Pruning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haotian Luo</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+L">Li Shen</a>, <a href="/search/cs?searchtype=author&query=He%2C+H">Haiying He</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yibo Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shiwei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+N">Naiqiang Tan</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+X">Xiaochun Cao</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12570v2-abstract-short" style="display: inline;"> Recently, long-thought reasoning LLMs, such as OpenAI's O1, adopt extended reasoning processes similar to how humans ponder over complex problems. This reasoning paradigm significantly enhances the model's problem-solving abilities and has achieved promising results. However, long-thought reasoning process leads to a substantial increase in inference time. A pressing challenge is reducing the infe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12570v2-abstract-full').style.display = 'inline'; document.getElementById('2501.12570v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12570v2-abstract-full" style="display: none;"> Recently, long-thought reasoning LLMs, such as OpenAI's O1, adopt extended reasoning processes similar to how humans ponder over complex problems. This reasoning paradigm significantly enhances the model's problem-solving abilities and has achieved promising results. However, long-thought reasoning process leads to a substantial increase in inference time. A pressing challenge is reducing the inference overhead of long-thought LLMs while ensuring accuracy. In this paper, we experimentally demonstrate that long-thought reasoning models struggle to effectively allocate token budgets based on problem difficulty and reasoning redundancies. To address this, we propose Length-Harmonizing Fine-Tuning (O1-Pruner), aiming at minimizing reasoning overhead while maintaining accuracy. This effective fine-tuning method first estimates the LLM's baseline performance through pre-sampling and then uses RL-style fine-tuning to encourage the model to generate shorter reasoning processes under accuracy constraints. This allows the model to achieve efficient reasoning with lower redundancy while maintaining accuracy. Experiments on various mathematical reasoning benchmarks show that O1-Pruner not only significantly reduces inference overhead but also achieves higher accuracy, providing a novel and promising solution to this challenge. Our code is coming soon at https://github.com/StarDewXXX/O1-Pruner <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12570v2-abstract-full').style.display = 'none'; document.getElementById('2501.12570v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10935">arXiv:2501.10935</a> <span> [<a href="https://arxiv.org/pdf/2501.10935">pdf</a>, <a href="https://arxiv.org/format/2501.10935">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TSVC:Tripartite Learning with Semantic Variation Consistency for Robust Image-Text Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+S">Shuai Lyu</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Z">Zijing Tian</a>, <a href="/search/cs?searchtype=author&query=Ou%2C+Z">Zhonghong Ou</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Ha%2C+Q">Qiankun Ha</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Meina Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10935v1-abstract-short" style="display: inline;"> Cross-modal retrieval maps data under different modality via semantic relevance. Existing approaches implicitly assume that data pairs are well-aligned and ignore the widely existing annotation noise, i.e., noisy correspondence (NC). Consequently, it inevitably causes performance degradation. Despite attempts that employ the co-teaching paradigm with identical architectures to provide distinct dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10935v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10935v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10935v1-abstract-full" style="display: none;"> Cross-modal retrieval maps data under different modality via semantic relevance. Existing approaches implicitly assume that data pairs are well-aligned and ignore the widely existing annotation noise, i.e., noisy correspondence (NC). Consequently, it inevitably causes performance degradation. Despite attempts that employ the co-teaching paradigm with identical architectures to provide distinct data perspectives, the differences between these architectures are primarily stemmed from random initialization. Thus, the model becomes increasingly homogeneous along with the training process. Consequently, the additional information brought by this paradigm is severely limited. In order to resolve this problem, we introduce a Tripartite learning with Semantic Variation Consistency (TSVC) for robust image-text retrieval. We design a tripartite cooperative learning mechanism comprising a Coordinator, a Master, and an Assistant model. The Coordinator distributes data, and the Assistant model supports the Master model's noisy label prediction with diverse data. Moreover, we introduce a soft label estimation method based on mutual information variation, which quantifies the noise in new samples and assigns corresponding soft labels. We also present a new loss function to enhance robustness and optimize training effectiveness. Extensive experiments on three widely used datasets demonstrate that, even at increasing noise ratios, TSVC exhibits significant advantages in retrieval accuracy and maintains stable training performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10935v1-abstract-full').style.display = 'none'; document.getElementById('2501.10935v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted to the Main Track of AAAI 2025. It contains 9 pages, 7 figures, and is relevant to the areas of cross-modal retrieval and machine learning. The work presents a novel approach in robust image-text retrieval using a tripartite learning framework</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10131">arXiv:2501.10131</a> <span> [<a href="https://arxiv.org/pdf/2501.10131">pdf</a>, <a href="https://arxiv.org/format/2501.10131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ACE: Anatomically Consistent Embeddings in Composition and Decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Ziyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haozhe Luo</a>, <a href="/search/cs?searchtype=author&query=Taher%2C+M+R+H">Mohammad Reza Hosseinzadeh Taher</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+J">Jiaxuan Pang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+X">Xiaowei Ding</a>, <a href="/search/cs?searchtype=author&query=Gotway%2C+M">Michael Gotway</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jianming Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10131v1-abstract-short" style="display: inline;"> Medical images acquired from standardized protocols show consistent macroscopic or microscopic anatomical structures, and these structures consist of composable/decomposable organs and tissues, but existing self-supervised learning (SSL) methods do not appreciate such composable/decomposable structure attributes inherent to medical images. To overcome this limitation, this paper introduces a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10131v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10131v1-abstract-full" style="display: none;"> Medical images acquired from standardized protocols show consistent macroscopic or microscopic anatomical structures, and these structures consist of composable/decomposable organs and tissues, but existing self-supervised learning (SSL) methods do not appreciate such composable/decomposable structure attributes inherent to medical images. To overcome this limitation, this paper introduces a novel SSL approach called ACE to learn anatomically consistent embedding via composition and decomposition with two key branches: (1) global consistency, capturing discriminative macro-structures via extracting global features; (2) local consistency, learning fine-grained anatomical details from composable/decomposable patch features via corresponding matrix matching. Experimental results across 6 datasets 2 backbones, evaluated in few-shot learning, fine-tuning, and property analysis, show ACE's superior robustness, transferability, and clinical potential. The innovations of our ACE lie in grid-wise image cropping, leveraging the intrinsic properties of compositionality and decompositionality of medical images, bridging the semantic gap from high-level pathologies to low-level tissue anomalies, and providing a new SSL method for medical imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10131v1-abstract-full').style.display = 'none'; document.getElementById('2501.10131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WACV 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06282">arXiv:2501.06282</a> <span> [<a href="https://arxiv.org/pdf/2501.06282">pdf</a>, <a href="https://arxiv.org/format/2501.06282">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> MinMo: A Multimodal Large Language Model for Seamless Voice Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qian Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yafeng Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yanni Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mengzhe Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yingda Chen</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+C">Chong Deng</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Z">Zhihao Du</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+R">Ruize Gao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Changfeng Gao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Zhifu Gao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yabin Li</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+X">Xiang Lv</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaqing Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoneng Luo</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+B">Bin Ma</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+C">Chongjia Ni</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xian Shi</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jialong Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hui Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wen Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yunlan Xu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+F">Fan Yu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Zhijie Yan</a> , et al. (11 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06282v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) and multimodal speech-text models have laid the groundwork for seamless voice interactions, enabling real-time, natural, and human-like conversations. Previous models for voice interactions are categorized as native and aligned. Native models integrate speech and text processing in one framework but struggle with issues like differing sequence le… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06282v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06282v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06282v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) and multimodal speech-text models have laid the groundwork for seamless voice interactions, enabling real-time, natural, and human-like conversations. Previous models for voice interactions are categorized as native and aligned. Native models integrate speech and text processing in one framework but struggle with issues like differing sequence lengths and insufficient pre-training. Aligned models maintain text LLM capabilities but are often limited by small datasets and a narrow focus on speech tasks. In this work, we introduce MinMo, a Multimodal Large Language Model with approximately 8B parameters for seamless voice interaction. We address the main limitations of prior aligned multimodal models. We train MinMo through multiple stages of speech-to-text alignment, text-to-speech alignment, speech-to-speech alignment, and duplex interaction alignment, on 1.4 million hours of diverse speech data and a broad range of speech tasks. After the multi-stage training, MinMo achieves state-of-the-art performance across various benchmarks for voice comprehension and generation while maintaining the capabilities of text LLMs, and also facilitates full-duplex conversation, that is, simultaneous two-way communication between the user and the system. Moreover, we propose a novel and simple voice decoder that outperforms prior models in voice generation. The enhanced instruction-following capabilities of MinMo supports controlling speech generation based on user instructions, with various nuances including emotions, dialects, and speaking rates, and mimicking specific voices. For MinMo, the speech-to-text latency is approximately 100ms, full-duplex latency is approximately 600ms in theory and 800ms in practice. The MinMo project web page is https://funaudiollm.github.io/minmo, and the code and models will be released soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06282v1-abstract-full').style.display = 'none'; document.getElementById('2501.06282v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress. Authors are listed in alphabetical order by family name</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02749">arXiv:2501.02749</a> <span> [<a href="https://arxiv.org/pdf/2501.02749">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Robot Route Optimization in Smart Logistics with Transformer and GNN Integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+J">Jianjun Wei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shuchen Zhao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+A">Ankai Liang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhongjin Xu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+R">Ruxue Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02749v1-abstract-short" style="display: inline;"> This research delves into advanced route optimization for robots in smart logistics, leveraging a fusion of Transformer architectures, Graph Neural Networks (GNNs), and Generative Adversarial Networks (GANs). The approach utilizes a graph-based representation encompassing geographical data, cargo allocation, and robot dynamics, addressing both spatial and resource limitations to refine route effic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02749v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02749v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02749v1-abstract-full" style="display: none;"> This research delves into advanced route optimization for robots in smart logistics, leveraging a fusion of Transformer architectures, Graph Neural Networks (GNNs), and Generative Adversarial Networks (GANs). The approach utilizes a graph-based representation encompassing geographical data, cargo allocation, and robot dynamics, addressing both spatial and resource limitations to refine route efficiency. Through extensive testing with authentic logistics datasets, the proposed method achieves notable improvements, including a 15% reduction in travel distance, a 20% boost in time efficiency, and a 10% decrease in energy consumption. These findings highlight the algorithm's effectiveness, promoting enhanced performance in intelligent logistics operations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02749v1-abstract-full').style.display = 'none'; document.getElementById('2501.02749v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01427">arXiv:2501.01427</a> <span> [<a href="https://arxiv.org/pdf/2501.01427">pdf</a>, <a href="https://arxiv.org/format/2501.01427">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VideoAnydoor: High-fidelity Video Object Insertion with Precise Motion Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tu%2C+Y">Yuanpeng Tu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+S">Sihui Ji</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+X">Xiang Bai</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengshuang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01427v3-abstract-short" style="display: inline;"> Despite significant advancements in video generation, inserting a given object into videos remains a challenging task. The difficulty lies in preserving the appearance details of the reference object and accurately modeling coherent motions at the same time. In this paper, we propose VideoAnydoor, a zero-shot video object insertion framework with high-fidelity detail preservation and precise motio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01427v3-abstract-full').style.display = 'inline'; document.getElementById('2501.01427v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01427v3-abstract-full" style="display: none;"> Despite significant advancements in video generation, inserting a given object into videos remains a challenging task. The difficulty lies in preserving the appearance details of the reference object and accurately modeling coherent motions at the same time. In this paper, we propose VideoAnydoor, a zero-shot video object insertion framework with high-fidelity detail preservation and precise motion control. Starting from a text-to-video model, we utilize an ID extractor to inject the global identity and leverage a box sequence to control the overall motion. To preserve the detailed appearance and meanwhile support fine-grained motion control, we design a pixel warper. It takes the reference image with arbitrary key-points and the corresponding key-point trajectories as inputs. It warps the pixel details according to the trajectories and fuses the warped features with the diffusion U-Net, thus improving detail preservation and supporting users in manipulating the motion trajectories. In addition, we propose a training strategy involving both videos and static images with a weighted loss to enhance insertion quality. VideoAnydoor demonstrates significant superiority over existing methods and naturally supports various downstream applications (e.g., talking head generation, video virtual try-on, multi-region editing) without task-specific fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01427v3-abstract-full').style.display = 'none'; document.getElementById('2501.01427v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://videoanydoor.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01425">arXiv:2501.01425</a> <span> [<a href="https://arxiv.org/pdf/2501.01425">pdf</a>, <a href="https://arxiv.org/format/2501.01425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Free-Form Motion Control: A Synthetic Video Generation Dataset with Controllable Camera and Object Motions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shuai%2C+X">Xincheng Shuai</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+H">Henghui Ding</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Z">Zhenyuan Qin</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xingjun Ma</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01425v2-abstract-short" style="display: inline;"> Controlling the movements of dynamic objects and the camera within generated videos is a meaningful yet challenging task. Due to the lack of datasets with comprehensive motion annotations, existing algorithms can not simultaneously control the motions of both camera and objects, resulting in limited controllability over generated contents. To address this issue and facilitate the research in this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01425v2-abstract-full').style.display = 'inline'; document.getElementById('2501.01425v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01425v2-abstract-full" style="display: none;"> Controlling the movements of dynamic objects and the camera within generated videos is a meaningful yet challenging task. Due to the lack of datasets with comprehensive motion annotations, existing algorithms can not simultaneously control the motions of both camera and objects, resulting in limited controllability over generated contents. To address this issue and facilitate the research in this field, we introduce a Synthetic Dataset for Free-Form Motion Control (SynFMC). The proposed SynFMC dataset includes diverse objects and environments and covers various motion patterns according to specific rules, simulating common and complex real-world scenarios. The complete 6D pose information facilitates models learning to disentangle the motion effects from objects and the camera in a video. To validate the effectiveness and generalization of SynFMC, we further propose a method, Free-Form Motion Control (FMC). FMC enables independent or simultaneous control of object and camera movements, producing high-fidelity videos. Moreover, it is compatible with various personalized text-to-image (T2I) models for different content styles. Extensive experiments demonstrate that the proposed FMC outperforms previous methods across multiple scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01425v2-abstract-full').style.display = 'none'; document.getElementById('2501.01425v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://henghuiding.github.io/SynFMC/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18588">arXiv:2412.18588</a> <span> [<a href="https://arxiv.org/pdf/2412.18588">pdf</a>, <a href="https://arxiv.org/format/2412.18588">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Paragraph is All It Takes: Rich Robot Behaviors from Interacting, Trusted LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=OpenMind"> OpenMind</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+S">Shaohong Zhong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Adam Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Boyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Homin Luo</a>, <a href="/search/cs?searchtype=author&query=Liphardt%2C+J">Jan Liphardt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18588v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) are compact representations of all public knowledge of our physical environment and animal and human behaviors. The application of LLMs to robotics may offer a path to highly capable robots that perform well across most human tasks with limited or even zero tuning. Aside from increasingly sophisticated reasoning and task planning, networks of (suitably designed) LLMs o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18588v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18588v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18588v1-abstract-full" style="display: none;"> Large Language Models (LLMs) are compact representations of all public knowledge of our physical environment and animal and human behaviors. The application of LLMs to robotics may offer a path to highly capable robots that perform well across most human tasks with limited or even zero tuning. Aside from increasingly sophisticated reasoning and task planning, networks of (suitably designed) LLMs offer ease of upgrading capabilities and allow humans to directly observe the robot's thinking. Here we explore the advantages, limitations, and particularities of using LLMs to control physical robots. The basic system consists of four LLMs communicating via a human language data bus implemented via web sockets and ROS2 message passing. Surprisingly, rich robot behaviors and good performance across different tasks could be achieved despite the robot's data fusion cycle running at only 1Hz and the central data bus running at the extremely limited rates of the human brain, of around 40 bits/s. The use of natural language for inter-LLM communication allowed the robot's reasoning and decision making to be directly observed by humans and made it trivial to bias the system's behavior with sets of rules written in plain English. These rules were immutably written into Ethereum, a global, public, and censorship resistant Turing-complete computer. We suggest that by using natural language as the data bus among interacting AIs, and immutable public ledgers to store behavior constraints, it is possible to build robots that combine unexpectedly rich performance, upgradability, and durable alignment with humans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18588v1-abstract-full').style.display = 'none'; document.getElementById('2412.18588v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18292">arXiv:2412.18292</a> <span> [<a href="https://arxiv.org/pdf/2412.18292">pdf</a>, <a href="https://arxiv.org/format/2412.18292">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Multi-Robot Semantic Navigation Through Multimodal Chain-of-Thought Score Collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhixuan Shen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haonan Luo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kexun Chen</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+F">Fengmao Lv</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tianrui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18292v1-abstract-short" style="display: inline;"> Understanding how humans cooperatively utilize semantic knowledge to explore unfamiliar environments and decide on navigation directions is critical for house service multi-robot systems. Previous methods primarily focused on single-robot centralized planning strategies, which severely limited exploration efficiency. Recent research has considered decentralized planning strategies for multiple rob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18292v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18292v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18292v1-abstract-full" style="display: none;"> Understanding how humans cooperatively utilize semantic knowledge to explore unfamiliar environments and decide on navigation directions is critical for house service multi-robot systems. Previous methods primarily focused on single-robot centralized planning strategies, which severely limited exploration efficiency. Recent research has considered decentralized planning strategies for multiple robots, assigning separate planning models to each robot, but these approaches often overlook communication costs. In this work, we propose Multimodal Chain-of-Thought Co-Navigation (MCoCoNav), a modular approach that utilizes multimodal Chain-of-Thought to plan collaborative semantic navigation for multiple robots. MCoCoNav combines visual perception with Vision Language Models (VLMs) to evaluate exploration value through probabilistic scoring, thus reducing time costs and achieving stable outputs. Additionally, a global semantic map is used as a communication bridge, minimizing communication overhead while integrating observational results. Guided by scores that reflect exploration trends, robots utilize this map to assess whether to explore new frontier points or revisit history nodes. Experiments on HM3D_v0.2 and MP3D demonstrate the effectiveness of our approach. Our code is available at https://github.com/FrankZxShen/MCoCoNav.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18292v1-abstract-full').style.display = 'none'; document.getElementById('2412.18292v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 10 figures, Extended Version of accepted AAAI 2025 Paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17458">arXiv:2412.17458</a> <span> [<a href="https://arxiv.org/pdf/2412.17458">pdf</a>, <a href="https://arxiv.org/format/2412.17458">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TCSVT.2024.3479887">10.1109/TCSVT.2024.3479887 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Progressive Boundary Guided Anomaly Synthesis for Industrial Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qiyu Chen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Huiyuan Luo</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Han Gao</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+C">Chengkan Lv</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhengtao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17458v1-abstract-short" style="display: inline;"> Unsupervised anomaly detection methods can identify surface defects in industrial images by leveraging only normal samples for training. Due to the risk of overfitting when learning from a single class, anomaly synthesis strategies are introduced to enhance detection capability by generating artificial anomalies. However, existing strategies heavily rely on anomalous textures from auxiliary datase… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17458v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17458v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17458v1-abstract-full" style="display: none;"> Unsupervised anomaly detection methods can identify surface defects in industrial images by leveraging only normal samples for training. Due to the risk of overfitting when learning from a single class, anomaly synthesis strategies are introduced to enhance detection capability by generating artificial anomalies. However, existing strategies heavily rely on anomalous textures from auxiliary datasets. Moreover, their limitations in the coverage and directionality of anomaly synthesis may result in a failure to capture useful information and lead to significant redundancy. To address these issues, we propose a novel Progressive Boundary-guided Anomaly Synthesis (PBAS) strategy, which can directionally synthesize crucial feature-level anomalies without auxiliary textures. It consists of three core components: Approximate Boundary Learning (ABL), Anomaly Feature Synthesis (AFS), and Refined Boundary Optimization (RBO). To make the distribution of normal samples more compact, ABL first learns an approximate decision boundary by center constraint, which improves the center initialization through feature alignment. AFS then directionally synthesizes anomalies with more flexible scales guided by the hypersphere distribution of normal features. Since the boundary is so loose that it may contain real anomalies, RBO refines the decision boundary through the binary classification of artificial anomalies and normal features. Experimental results show that our method achieves state-of-the-art performance and the fastest detection speed on three widely used industrial datasets, including MVTec AD, VisA, and MPDD. The code will be available at: https://github.com/cqylunlun/PBAS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17458v1-abstract-full').style.display = 'none'; document.getElementById('2412.17458v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Circuits and Systems for Video Technology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.15130">arXiv:2412.15130</a> <span> [<a href="https://arxiv.org/pdf/2412.15130">pdf</a>, <a href="https://arxiv.org/ps/2412.15130">ps</a>, <a href="https://arxiv.org/format/2412.15130">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Continuous Flattening and Reversing of Convex Polyhedral Linkages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Demaine%2C+E+D">Erik D. Demaine</a>, <a href="/search/cs?searchtype=author&query=Demaine%2C+M+L">Martin L. Demaine</a>, <a href="/search/cs?searchtype=author&query=Hecher%2C+M">Markus Hecher</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+R">Rebecca Lin</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+V+H">Victor H. Luo</a>, <a href="/search/cs?searchtype=author&query=Nara%2C+C">Chie Nara</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.15130v1-abstract-short" style="display: inline;"> We prove two results about transforming any convex polyhedron, modeled as a linkage L of its edges. First, if we subdivide each edge of L in half, then L can be continuously flattened into a plane. Second, if L is equilateral and we again subdivide each edge in half, then L can be reversed, i.e., turned inside-out. A linear number of subdivisions is optimal up to constant factors, as we show (none… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15130v1-abstract-full').style.display = 'inline'; document.getElementById('2412.15130v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.15130v1-abstract-full" style="display: none;"> We prove two results about transforming any convex polyhedron, modeled as a linkage L of its edges. First, if we subdivide each edge of L in half, then L can be continuously flattened into a plane. Second, if L is equilateral and we again subdivide each edge in half, then L can be reversed, i.e., turned inside-out. A linear number of subdivisions is optimal up to constant factors, as we show (nonequilateral) examples that require a linear number of subdivisions. For nonequilateral linkages, we show that more subdivisions can be required: even a tetrahedron can require an arbitrary number of subdivisions to reverse. For nonequilateral tetrahedra, we provide an algorithm that matches this lower bound up to constant factors: logarithmic in the aspect ratio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15130v1-abstract-full').style.display = 'none'; document.getElementById('2412.15130v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68R10; 68Q17; 68U05 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.2.2; F.2.2; I.3.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14168">arXiv:2412.14168</a> <span> [<a href="https://arxiv.org/pdf/2412.14168">pdf</a>, <a href="https://arxiv.org/format/2412.14168">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FashionComposer: Compositional Fashion Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+S">Sihui Ji</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yiyang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaogang Xu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengshuang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14168v2-abstract-short" style="display: inline;"> We present FashionComposer for compositional fashion image generation. Unlike previous methods, FashionComposer is highly flexible. It takes multi-modal input (i.e., text prompt, parametric human model, garment image, and face image) and supports personalizing the appearance, pose, and figure of the human and assigning multiple garments in one pass. To achieve this, we first develop a universal fr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14168v2-abstract-full').style.display = 'inline'; document.getElementById('2412.14168v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14168v2-abstract-full" style="display: none;"> We present FashionComposer for compositional fashion image generation. Unlike previous methods, FashionComposer is highly flexible. It takes multi-modal input (i.e., text prompt, parametric human model, garment image, and face image) and supports personalizing the appearance, pose, and figure of the human and assigning multiple garments in one pass. To achieve this, we first develop a universal framework capable of handling diverse input modalities. We construct scaled training data to enhance the model's robust compositional capabilities. To accommodate multiple reference images (garments and faces) seamlessly, we organize these references in a single image as an "asset library" and employ a reference UNet to extract appearance features. To inject the appearance features into the correct pixels in the generated result, we propose subject-binding attention. It binds the appearance features from different "assets" with the corresponding text features. In this way, the model could understand each asset according to their semantics, supporting arbitrary numbers and types of reference images. As a comprehensive solution, FashionComposer also supports many other applications like human album generation, diverse virtual try-on tasks, etc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14168v2-abstract-full').style.display = 'none'; document.getElementById('2412.14168v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://sihuiji.github.io/FashionComposer-Page</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11905">arXiv:2412.11905</a> <span> [<a href="https://arxiv.org/pdf/2412.11905">pdf</a>, <a href="https://arxiv.org/format/2412.11905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> One for Dozens: Adaptive REcommendation for All Domains with Counterfactual Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Huishi Luo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yiwen Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yiqing Wu</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+F">Fuzhen Zhuang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Deqing Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11905v2-abstract-short" style="display: inline;"> Multi-domain recommendation (MDR) aims to enhance recommendation performance across various domains. However, real-world recommender systems in online platforms often need to handle dozens or even hundreds of domains, far exceeding the capabilities of traditional MDR algorithms, which typically focus on fewer than five domains. Key challenges include a substantial increase in parameter count, high… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11905v2-abstract-full').style.display = 'inline'; document.getElementById('2412.11905v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11905v2-abstract-full" style="display: none;"> Multi-domain recommendation (MDR) aims to enhance recommendation performance across various domains. However, real-world recommender systems in online platforms often need to handle dozens or even hundreds of domains, far exceeding the capabilities of traditional MDR algorithms, which typically focus on fewer than five domains. Key challenges include a substantial increase in parameter count, high maintenance costs, and intricate knowledge transfer patterns across domains. Furthermore, minor domains often suffer from data sparsity, leading to inadequate training in classical methods. To address these issues, we propose Adaptive REcommendation for All Domains with counterfactual augmentation (AREAD). AREAD employs a hierarchical structure with a limited number of expert networks at several layers, to effectively capture domain knowledge at different granularities. To adaptively capture the knowledge transfer pattern across domains, we generate and iteratively prune a hierarchical expert network selection mask for each domain during training. Additionally, counterfactual assumptions are used to augment data in minor domains, supporting their iterative mask pruning. Our experiments on two public datasets, each encompassing over twenty domains, demonstrate AREAD's effectiveness, especially in data-sparse domains. Source code is available at https://github.com/Chrissie-Law/AREAD-Multi-Domain-Recommendation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11905v2-abstract-full').style.display = 'none'; document.getElementById('2412.11905v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Extended version accepted by AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11394">arXiv:2412.11394</a> <span> [<a href="https://arxiv.org/pdf/2412.11394">pdf</a>, <a href="https://arxiv.org/ps/2412.11394">ps</a>, <a href="https://arxiv.org/format/2412.11394">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TCSS.2022.3184818">10.1109/TCSS.2022.3184818 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Privacy-Preserving Brain-Computer Interfaces: A Systematic Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+K">K. Xia</a>, <a href="/search/cs?searchtype=author&query=Duch%2C+W">W. Duch</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Y. Sun</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">K. Xu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+W">W. Fang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">H. Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Y. Zhang</a>, <a href="/search/cs?searchtype=author&query=Sang%2C+D">D. Sang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">X. Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">F-Y Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">D. Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11394v1-abstract-short" style="display: inline;"> A brain-computer interface (BCI) establishes a direct communication pathway between the human brain and a computer. It has been widely used in medical diagnosis, rehabilitation, education, entertainment, etc. Most research so far focuses on making BCIs more accurate and reliable, but much less attention has been paid to their privacy. Developing a commercial BCI system usually requires close colla… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11394v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11394v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11394v1-abstract-full" style="display: none;"> A brain-computer interface (BCI) establishes a direct communication pathway between the human brain and a computer. It has been widely used in medical diagnosis, rehabilitation, education, entertainment, etc. Most research so far focuses on making BCIs more accurate and reliable, but much less attention has been paid to their privacy. Developing a commercial BCI system usually requires close collaborations among multiple organizations, e.g., hospitals, universities, and/or companies. Input data in BCIs, e.g., electroencephalogram (EEG), contain rich privacy information, and the developed machine learning model is usually proprietary. Data and model transmission among different parties may incur significant privacy threats, and hence privacy protection in BCIs must be considered. Unfortunately, there does not exist any contemporary and comprehensive review on privacy-preserving BCIs. This paper fills this gap, by describing potential privacy threats and protection strategies in BCIs. It also points out several challenges and future research directions in developing privacy-preserving BCIs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11394v1-abstract-full').style.display = 'none'; document.getElementById('2412.11394v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Trans. on Computational Social Systems, 10(5):2312-2324, 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10853">arXiv:2412.10853</a> <span> [<a href="https://arxiv.org/pdf/2412.10853">pdf</a>, <a href="https://arxiv.org/format/2412.10853">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SEW: Self-calibration Enhanced Whole Slide Pathology Image Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoming Luo</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaotian Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shengxuming Zhang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+J">Jiabin Xia</a>, <a href="/search/cs?searchtype=author&query=Jian%2C+Y">Yang Jian</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yuning Sun</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+L">Liang Xue</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Mingli Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiuming Zhang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Z">Zunlei Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10853v2-abstract-short" style="display: inline;"> Pathology images are considered the ``gold standard" for cancer diagnosis and treatment, with gigapixel images providing extensive tissue and cellular information. Existing methods fail to simultaneously extract global structural and local detail features for comprehensive pathology image analysis efficiently. To address these limitations, we propose a self-calibration enhanced framework for whole… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10853v2-abstract-full').style.display = 'inline'; document.getElementById('2412.10853v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10853v2-abstract-full" style="display: none;"> Pathology images are considered the ``gold standard" for cancer diagnosis and treatment, with gigapixel images providing extensive tissue and cellular information. Existing methods fail to simultaneously extract global structural and local detail features for comprehensive pathology image analysis efficiently. To address these limitations, we propose a self-calibration enhanced framework for whole slide pathology image analysis, comprising three components: a global branch, a focus predictor, and a detailed branch. The global branch initially classifies using the pathological thumbnail, while the focus predictor identifies relevant regions for classification based on the last layer features of the global branch. The detailed extraction branch then assesses whether the magnified regions correspond to the lesion area. Finally, a feature consistency constraint between the global and detail branches ensures that the global branch focuses on the appropriate region and extracts sufficient discriminative features for final identification. These focused discriminative features prove invaluable for uncovering novel prognostic tumor markers from the perspective of feature cluster uniqueness and tissue spatial distribution. Extensive experiment results demonstrate that the proposed framework can rapidly deliver accurate and explainable results for pathological grading and prognosis tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10853v2-abstract-full').style.display = 'none'; document.getElementById('2412.10853v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10718">arXiv:2412.10718</a> <span> [<a href="https://arxiv.org/pdf/2412.10718">pdf</a>, <a href="https://arxiv.org/format/2412.10718">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Grid: Omni Visual Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wan%2C+C">Cong Wan</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+X">Xiangyang Luo</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Z">Zijian Cai</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yiren Song</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yunlong Zhao</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yifan Bai</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yuhang He</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Y">Yihong Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10718v4-abstract-short" style="display: inline;"> Visual generation has witnessed remarkable progress in single-image tasks, yet extending these capabilities to temporal sequences remains challenging. Current approaches either build specialized video models from scratch with enormous computational costs or add separate motion modules to image generators, both requiring learning temporal dynamics anew. We observe that modern image generation model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10718v4-abstract-full').style.display = 'inline'; document.getElementById('2412.10718v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10718v4-abstract-full" style="display: none;"> Visual generation has witnessed remarkable progress in single-image tasks, yet extending these capabilities to temporal sequences remains challenging. Current approaches either build specialized video models from scratch with enormous computational costs or add separate motion modules to image generators, both requiring learning temporal dynamics anew. We observe that modern image generation models possess underutilized potential in handling structured layouts with implicit temporal understanding. Building on this insight, we introduce GRID, which reformulates temporal sequences as grid layouts, enabling holistic processing of visual sequences while leveraging existing model capabilities. Through a parallel flow-matching training strategy with coarse-to-fine scheduling, our approach achieves up to 67 faster inference speeds while using <1/1000 of the computational resources compared to specialized models. Extensive experiments demonstrate that GRID not only excels in temporal tasks from Text-to-Video to 3D Editing but also preserves strong performance in image generation, establishing itself as an efficient and versatile omni-solution for visual generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10718v4-abstract-full').style.display = 'none'; document.getElementById('2412.10718v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Codes: https://github.com/Should-AI-Lab/GRID</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09854">arXiv:2412.09854</a> <span> [<a href="https://arxiv.org/pdf/2412.09854">pdf</a>, <a href="https://arxiv.org/ps/2412.09854">ps</a>, <a href="https://arxiv.org/format/2412.09854">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TNSRE.2023.3310883">10.1109/TNSRE.2023.3310883 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> User Identity Protection in EEG-based Brain-Computer Interfaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Meng%2C+L">L. Meng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">X. Jiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">J. Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">W. Li</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">H. Luo</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">D. Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09854v1-abstract-short" style="display: inline;"> A brain-computer interface (BCI) establishes a direct communication pathway between the brain and an external device. Electroencephalogram (EEG) is the most popular input signal in BCIs, due to its convenience and low cost. Most research on EEG-based BCIs focuses on the accurate decoding of EEG signals; however, EEG signals also contain rich private information, e.g., user identity, emotion, and s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09854v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09854v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09854v1-abstract-full" style="display: none;"> A brain-computer interface (BCI) establishes a direct communication pathway between the brain and an external device. Electroencephalogram (EEG) is the most popular input signal in BCIs, due to its convenience and low cost. Most research on EEG-based BCIs focuses on the accurate decoding of EEG signals; however, EEG signals also contain rich private information, e.g., user identity, emotion, and so on, which should be protected. This paper first exposes a serious privacy problem in EEG-based BCIs, i.e., the user identity in EEG data can be easily learned so that different sessions of EEG data from the same user can be associated together to more reliably mine private information. To address this issue, we further propose two approaches to convert the original EEG data into identity-unlearnable EEG data, i.e., removing the user identity information while maintaining the good performance on the primary BCI task. Experiments on seven EEG datasets from five different BCI paradigms showed that on average the generated identity-unlearnable EEG data can reduce the user identification accuracy from 70.01\% to at most 21.36\%, greatly facilitating user privacy protection in EEG-based BCIs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09854v1-abstract-full').style.display = 'none'; document.getElementById('2412.09854v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Trans. on Neural Systems and Rehabilitation Engineering, 31:3576-3586, 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09521">arXiv:2412.09521</a> <span> [<a href="https://arxiv.org/pdf/2412.09521">pdf</a>, <a href="https://arxiv.org/format/2412.09521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Efficient and Comprehensive Feature Extraction in Large Vision-Language Model for Clinical Pathology Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shengxuming Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Weihan Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+T">Tianhong Gao</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jiacong Hu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoming Luo</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Mingli Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiuming Zhang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Z">Zunlei Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09521v1-abstract-short" style="display: inline;"> Pathological diagnosis is vital for determining disease characteristics, guiding treatment, and assessing prognosis, relying heavily on detailed, multi-scale analysis of high-resolution whole slide images (WSI). However, traditional pure vision models face challenges of redundant feature extraction, whereas existing large vision-language models (LVLMs) are limited by input resolution constraints,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09521v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09521v1-abstract-full" style="display: none;"> Pathological diagnosis is vital for determining disease characteristics, guiding treatment, and assessing prognosis, relying heavily on detailed, multi-scale analysis of high-resolution whole slide images (WSI). However, traditional pure vision models face challenges of redundant feature extraction, whereas existing large vision-language models (LVLMs) are limited by input resolution constraints, hindering their efficiency and accuracy. To overcome these issues, we propose two innovative strategies: the mixed task-guided feature enhancement, which directs feature extraction toward lesion-related details across scales, and the prompt-guided detail feature completion, which integrates coarse- and fine-grained features from WSI based on specific prompts without compromising inference speed. Leveraging a comprehensive dataset of 490,000 samples from diverse pathology tasks-including cancer detection, grading, vascular and neural invasion identification, and so on-we trained the pathology-specialized LVLM, OmniPath. Extensive experiments demonstrate that this model significantly outperforms existing methods in diagnostic accuracy and efficiency, offering an interactive, clinically aligned approach for auxiliary diagnosis in a wide range of pathology applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09521v1-abstract-full').style.display = 'none'; document.getElementById('2412.09521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08394">arXiv:2412.08394</a> <span> [<a href="https://arxiv.org/pdf/2412.08394">pdf</a>, <a href="https://arxiv.org/format/2412.08394">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Purification by Consistency-aware Latent Space Optimization on Data Manifolds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuhai Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiahao Yang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hui Luo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jie Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Li Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+B">Bo Han</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+M">Mingkui Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08394v1-abstract-short" style="display: inline;"> Deep neural networks (DNNs) are vulnerable to adversarial samples crafted by adding imperceptible perturbations to clean data, potentially leading to incorrect and dangerous predictions. Adversarial purification has been an effective means to improve DNNs robustness by removing these perturbations before feeding the data into the model. However, it faces significant challenges in preserving key st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08394v1-abstract-full').style.display = 'inline'; document.getElementById('2412.08394v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08394v1-abstract-full" style="display: none;"> Deep neural networks (DNNs) are vulnerable to adversarial samples crafted by adding imperceptible perturbations to clean data, potentially leading to incorrect and dangerous predictions. Adversarial purification has been an effective means to improve DNNs robustness by removing these perturbations before feeding the data into the model. However, it faces significant challenges in preserving key structural and semantic information of data, as the imperceptible nature of adversarial perturbations makes it hard to avoid over-correcting, which can destroy important information and degrade model performance. In this paper, we break away from traditional adversarial purification methods by focusing on the clean data manifold. To this end, we reveal that samples generated by a well-trained generative model are close to clean ones but far from adversarial ones. Leveraging this insight, we propose Consistency Model-based Adversarial Purification (CMAP), which optimizes vectors within the latent space of a pre-trained consistency model to generate samples for restoring clean data. Specifically, 1) we propose a \textit{Perceptual consistency restoration} mechanism by minimizing the discrepancy between generated samples and input samples in both pixel and perceptual spaces. 2) To maintain the optimized latent vectors within the valid data manifold, we introduce a \textit{Latent distribution consistency constraint} strategy to align generated samples with the clean data distribution. 3) We also apply a \textit{Latent vector consistency prediction} scheme via an ensemble approach to enhance prediction reliability. CMAP fundamentally addresses adversarial perturbations at their source, providing a robust purification. Extensive experiments on CIFAR-10 and ImageNet-100 show that our CMAP significantly enhances robustness against strong adversarial attacks while preserving high natural accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08394v1-abstract-full').style.display = 'none'; document.getElementById('2412.08394v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07231">arXiv:2412.07231</a> <span> [<a href="https://arxiv.org/pdf/2412.07231">pdf</a>, <a href="https://arxiv.org/ps/2412.07231">ps</a>, <a href="https://arxiv.org/format/2412.07231">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.inffus.2024.102316">10.1016/j.inffus.2024.102316 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Adversarial Filtering Based Evasion and Backdoor Attacks to EEG-Based Brain-Computer Interfaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Meng%2C+L">Lubin Meng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xue Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaoqing Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenzhong Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hanbin Luo</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Dongrui Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07231v1-abstract-short" style="display: inline;"> A brain-computer interface (BCI) enables direct communication between the brain and an external device. Electroencephalogram (EEG) is a common input signal for BCIs, due to its convenience and low cost. Most research on EEG-based BCIs focuses on the accurate decoding of EEG signals, while ignoring their security. Recent studies have shown that machine learning models in BCIs are vulnerable to adve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07231v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07231v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07231v1-abstract-full" style="display: none;"> A brain-computer interface (BCI) enables direct communication between the brain and an external device. Electroencephalogram (EEG) is a common input signal for BCIs, due to its convenience and low cost. Most research on EEG-based BCIs focuses on the accurate decoding of EEG signals, while ignoring their security. Recent studies have shown that machine learning models in BCIs are vulnerable to adversarial attacks. This paper proposes adversarial filtering based evasion and backdoor attacks to EEG-based BCIs, which are very easy to implement. Experiments on three datasets from different BCI paradigms demonstrated the effectiveness of our proposed attack approaches. To our knowledge, this is the first study on adversarial filtering for EEG-based BCIs, raising a new security concern and calling for more attention on the security of BCIs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07231v1-abstract-full').style.display = 'none'; document.getElementById('2412.07231v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> L. Meng, X. Jiang, X. Chen, W. Liu, H. Luo and D. Wu, Adversarial Filtering Based Evasion and Backdoor Attacks to EEG-Based Brain-Computer Interfaces, Information Fusion, 107:102316, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07228">arXiv:2412.07228</a> <span> [<a href="https://arxiv.org/pdf/2412.07228">pdf</a>, <a href="https://arxiv.org/ps/2412.07228">ps</a>, <a href="https://arxiv.org/format/2412.07228">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TBME.2023.3303289">10.1109/TBME.2023.3303289 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> T-TIME: Test-Time Information Maximization Ensemble for Plug-and-Play BCIs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+S">Siyang Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziwei Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hanbin Luo</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+L">Lieyun Ding</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Dongrui Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07228v1-abstract-short" style="display: inline;"> Objective: An electroencephalogram (EEG)-based brain-computer interface (BCI) enables direct communication between the human brain and a computer. Due to individual differences and non-stationarity of EEG signals, such BCIs usually require a subject-specific calibration session before each use, which is time-consuming and user-unfriendly. Transfer learning (TL) has been proposed to shorten or elim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07228v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07228v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07228v1-abstract-full" style="display: none;"> Objective: An electroencephalogram (EEG)-based brain-computer interface (BCI) enables direct communication between the human brain and a computer. Due to individual differences and non-stationarity of EEG signals, such BCIs usually require a subject-specific calibration session before each use, which is time-consuming and user-unfriendly. Transfer learning (TL) has been proposed to shorten or eliminate this calibration, but existing TL approaches mainly consider offline settings, where all unlabeled EEG trials from the new user are available. Methods: This paper proposes Test-Time Information Maximization Ensemble (T-TIME) to accommodate the most challenging online TL scenario, where unlabeled EEG data from the new user arrive in a stream, and immediate classification is performed. T-TIME initializes multiple classifiers from the aligned source data. When an unlabeled test EEG trial arrives, T-TIME first predicts its labels using ensemble learning, and then updates each classifier by conditional entropy minimization and adaptive marginal distribution regularization. Our code is publicized. Results: Extensive experiments on three public motor imagery based BCI datasets demonstrated that T-TIME outperformed about 20 classical and state-of-the-art TL approaches. Significance: To our knowledge, this is the first work on test time adaptation for calibration-free EEG-based BCIs, making plug-and-play BCIs possible. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07228v1-abstract-full').style.display = 'none'; document.getElementById('2412.07228v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> S. Li, Z. Wang, H. Luo, L. Ding and D. Wu, T-TIME: Test-Time Information Maximization Ensemble for Plug-and-Play BCIs, IEEE Trans. on Biomedical Engineering, 71(2):423-432, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07120">arXiv:2412.07120</a> <span> [<a href="https://arxiv.org/pdf/2412.07120">pdf</a>, <a href="https://arxiv.org/ps/2412.07120">ps</a>, <a href="https://arxiv.org/format/2412.07120">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Corrupted Learning Dynamics in Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tsuchiya%2C+T">Taira Tsuchiya</a>, <a href="/search/cs?searchtype=author&query=Ito%2C+S">Shinji Ito</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haipeng Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07120v1-abstract-short" style="display: inline;"> Learning in games is the problem where multiple players interact in a shared environment, each aiming to minimize their own regret, and it is known that an approximate equilibrium can be obtained when all players employ no-regret algorithms. Notably, by adopting optimistic follow-the-regularized-leader (OFTRL), the regret of each player after $T$ rounds is constant in two-player zero-sum games, im… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07120v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07120v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07120v1-abstract-full" style="display: none;"> Learning in games is the problem where multiple players interact in a shared environment, each aiming to minimize their own regret, and it is known that an approximate equilibrium can be obtained when all players employ no-regret algorithms. Notably, by adopting optimistic follow-the-regularized-leader (OFTRL), the regret of each player after $T$ rounds is constant in two-player zero-sum games, implying that an equilibrium can be computed at a faster rate of $O(1/T)$. However, this acceleration is limited to the honest regime, where all players fully adhere to the given algorithms. To address this limitation, this paper presents corrupted learning dynamics that adaptively find an equilibrium at a rate dependent on the degree of deviation by each player from the given algorithm's output. First, in two-player zero-sum games, we provide learning dynamics where the external regret of the x-player (and similarly for the y-player) in the corrupted regime is roughly bounded by $O(\log (m_\mathrm{x} m_\mathrm{y}) + \sqrt{C_\mathrm{y}} + C_\mathrm{x})$, which implies a convergence rate of $\tilde{O}((\sqrt{C_\mathrm{y}} + C_\mathrm{x})/T)$ to a Nash equilibrium. Here, $m_\mathrm{x}$ and $m_\mathrm{y}$ are the number of actions of the x- and y-players, respectively, and $C_\mathrm{x}$ and $C_\mathrm{y}$ are the cumulative deviations of the x- and y-players from their given algorithms. Furthermore, we extend our approach to multi-player general-sum games, showing that the swap regret of player $i$ in the corrupted regime is bounded by $O(\log T + \sqrt{\sum_j C_j \log T} + C_i)$, where $C_i$ is the cumulative deviations of player $i$ from the given algorithm. This implies a convergence rate of $O((\log T + \sqrt{\sum_j C_j \log T} + C_i)/T)$ to a correlated equilibrium. Our learning dynamics are agnostic to the corruption levels and are based on OFTRL with new adaptive learning rates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07120v1-abstract-full').style.display = 'none'; document.getElementById('2412.07120v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.04742">arXiv:2412.04742</a> <span> [<a href="https://arxiv.org/pdf/2412.04742">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> DRDST: Low-latency DAG Consensus through Robust Dynamic Sharding and Tree-broadcasting for IoV </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+R">Runhua Chen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoxiang Luo</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Gang Sun</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hongfang Yu</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Dustdar%2C+S">Schahram Dustdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.04742v1-abstract-short" style="display: inline;"> The Internet of Vehicles (IoV) is emerging as a pivotal technology for enhancing traffic management and safety. Its rapid development demands solutions for enhanced communication efficiency and reduced latency. However, traditional centralized networks struggle to meet these demands, prompting the exploration of decentralized solutions such as blockchain. Addressing blockchain's scalability challe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04742v1-abstract-full').style.display = 'inline'; document.getElementById('2412.04742v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.04742v1-abstract-full" style="display: none;"> The Internet of Vehicles (IoV) is emerging as a pivotal technology for enhancing traffic management and safety. Its rapid development demands solutions for enhanced communication efficiency and reduced latency. However, traditional centralized networks struggle to meet these demands, prompting the exploration of decentralized solutions such as blockchain. Addressing blockchain's scalability challenges posed by the growing number of nodes and transactions calls for innovative solutions, among which sharding stands out as a pivotal approach to significantly enhance blockchain throughput. However, existing schemes still face challenges related to a) the impact of vehicle mobility on blockchain consensus, especially for cross-shard transaction; and b) the strict requirements of low latency consensus in a highly dynamic network. In this paper, we propose a DAG (Directed Acyclic Graph) consensus leveraging Robust Dynamic Sharding and Tree-broadcasting (DRDST) to address these challenges. Specifically, we first develop a standard for evaluating the network stability of nodes, combined with the nodes' trust values, to propose a novel robust sharding model that is solved through the design of the Genetic Sharding Algorithm (GSA). Then, we optimize the broadcast latency of the whole sharded network by improving the tree-broadcasting to minimize the maximum broadcast latency within each shard. On this basis, we also design a DAG consensus scheme based on an improved hashgraph protocol, which can efficiently handle cross-shard transactions. Finally, the simulation proves the proposed scheme is superior to the comparison schemes in latency, throughput, consensus success rate, and node traffic load. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04742v1-abstract-full').style.display = 'none'; document.getElementById('2412.04742v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01550">arXiv:2412.01550</a> <span> [<a href="https://arxiv.org/pdf/2412.01550">pdf</a>, <a href="https://arxiv.org/format/2412.01550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chunlin Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hanqing Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Ye Shi</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoyang Luo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Sibei Yang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jingyi Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingya Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01550v1-abstract-short" style="display: inline;"> 3D affordance segmentation aims to link human instructions to touchable regions of 3D objects for embodied manipulations. Existing efforts typically adhere to single-object, single-affordance paradigms, where each affordance type or explicit instruction strictly corresponds to a specific affordance region and are unable to handle long-horizon tasks. Such a paradigm cannot actively reason about com… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01550v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01550v1-abstract-full" style="display: none;"> 3D affordance segmentation aims to link human instructions to touchable regions of 3D objects for embodied manipulations. Existing efforts typically adhere to single-object, single-affordance paradigms, where each affordance type or explicit instruction strictly corresponds to a specific affordance region and are unable to handle long-horizon tasks. Such a paradigm cannot actively reason about complex user intentions that often imply sequential affordances. In this paper, we introduce the Sequential 3D Affordance Reasoning task, which extends the traditional paradigm by reasoning from cumbersome user intentions and then decomposing them into a series of segmentation maps. Toward this, we construct the first instruction-based affordance segmentation benchmark that includes reasoning over both single and sequential affordances, comprising 180K instruction-point cloud pairs. Based on the benchmark, we propose our model, SeqAfford, to unlock the 3D multi-modal large language model with additional affordance segmentation abilities, which ensures reasoning with world knowledge and fine-grained affordance grounding in a cohesive framework. We further introduce a multi-granular language-point integration module to endow 3D dense prediction. Extensive experimental evaluations show that our model excels over well-established methods and exhibits open-world generalization with sequential reasoning abilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01550v1-abstract-full').style.display = 'none'; document.getElementById('2412.01550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00857">arXiv:2412.00857</a> <span> [<a href="https://arxiv.org/pdf/2412.00857">pdf</a>, <a href="https://arxiv.org/format/2412.00857">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Advanced Video Inpainting Using Optical Flow-Guided Efficient Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gu%2C+B">Bohai Gu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+S">Song Guo</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+P">Peiran Dong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00857v2-abstract-short" style="display: inline;"> Recently, diffusion-based methods have achieved great improvements in the video inpainting task. However, these methods still face many challenges, such as maintaining temporal consistency and the time-consuming issue. This paper proposes an advanced video inpainting framework using optical Flow-guided Efficient Diffusion, called FloED. Specifically, FloED employs a dual-branch architecture, where… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00857v2-abstract-full').style.display = 'inline'; document.getElementById('2412.00857v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00857v2-abstract-full" style="display: none;"> Recently, diffusion-based methods have achieved great improvements in the video inpainting task. However, these methods still face many challenges, such as maintaining temporal consistency and the time-consuming issue. This paper proposes an advanced video inpainting framework using optical Flow-guided Efficient Diffusion, called FloED. Specifically, FloED employs a dual-branch architecture, where a flow branch first restores corrupted flow and a multi-scale flow adapter provides motion guidance to the main inpainting branch. Additionally, a training-free latent interpolation method is proposed to accelerate the multi-step denoising process using flow warping. Further introducing a flow attention cache mechanism, FLoED efficiently reduces the computational cost brought by incorporating optical flow. Comprehensive experiments in both background restoration and object removal tasks demonstrate that FloED outperforms state-of-the-art methods from the perspective of both performance and efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00857v2-abstract-full').style.display = 'none'; document.getElementById('2412.00857v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://nevsnev.github.io/FloED/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00724">arXiv:2412.00724</a> <span> [<a href="https://arxiv.org/pdf/2412.00724">pdf</a>, <a href="https://arxiv.org/format/2412.00724">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AdaScale: Dynamic Context-aware DNN Scaling via Automated Adaptation Loop on Mobile Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuzhan Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sicong Liu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+B">Bin Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Boqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+K">Ke Ma</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yasan Ding</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yao Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhiwen Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00724v1-abstract-short" style="display: inline;"> Deep learning is reshaping mobile applications, with a growing trend of deploying deep neural networks (DNNs) directly to mobile and embedded devices to address real-time performance and privacy. To accommodate local resource limitations, techniques like weight compression, convolution decomposition, and specialized layer architectures have been developed. However, the \textit{dynamic} and \textit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00724v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00724v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00724v1-abstract-full" style="display: none;"> Deep learning is reshaping mobile applications, with a growing trend of deploying deep neural networks (DNNs) directly to mobile and embedded devices to address real-time performance and privacy. To accommodate local resource limitations, techniques like weight compression, convolution decomposition, and specialized layer architectures have been developed. However, the \textit{dynamic} and \textit{diverse} deployment contexts of mobile devices pose significant challenges. Adapting deep models to meet varied device-specific requirements for latency, accuracy, memory, and energy is labor-intensive. Additionally, changing processor states, fluctuating memory availability, and competing processes frequently necessitate model re-compression to preserve user experience. To address these issues, we introduce AdaScale, an elastic inference framework that automates the adaptation of deep models to dynamic contexts. AdaScale leverages a self-evolutionary model to streamline network creation, employs diverse compression operator combinations to reduce the search space and improve outcomes, and integrates a resource availability awareness block and performance profilers to establish an automated adaptation loop. Our experiments demonstrate that AdaScale significantly enhances accuracy by 5.09%, reduces training overhead by 66.89%, speeds up inference latency by 1.51 to 6.2 times, and lowers energy costs by 4.69 times. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00724v1-abstract-full').style.display = 'none'; document.getElementById('2412.00724v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00324">arXiv:2412.00324</a> <span> [<a href="https://arxiv.org/pdf/2412.00324">pdf</a>, <a href="https://arxiv.org/format/2412.00324">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Robust Table Integration in Data Lakes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+D">Daomin Ji</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hui Luo</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+Z">Zhifeng Bao</a>, <a href="/search/cs?searchtype=author&query=Culpepper%2C+S">Shane Culpepper</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00324v1-abstract-short" style="display: inline;"> In this paper, we investigate the challenge of integrating tables from data lakes, focusing on three core tasks: 1) pairwise integrability judgment, which determines whether a tuple pair in a table is integrable, accounting for any occurrences of semantic equivalence or typographical errors; 2) integrable set discovery, which aims to identify all integrable sets in a table based on pairwise integr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00324v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00324v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00324v1-abstract-full" style="display: none;"> In this paper, we investigate the challenge of integrating tables from data lakes, focusing on three core tasks: 1) pairwise integrability judgment, which determines whether a tuple pair in a table is integrable, accounting for any occurrences of semantic equivalence or typographical errors; 2) integrable set discovery, which aims to identify all integrable sets in a table based on pairwise integrability judgments established in the first task; 3) multi-tuple conflict resolution, which resolves conflicts among multiple tuples during integration. We train a binary classifier to address the task of pairwise integrability judgment. Given the scarcity of labeled data, we propose a self-supervised adversarial contrastive learning algorithm to perform classification, which incorporates data augmentation methods and adversarial examples to autonomously generate new training data. Upon the output of pairwise integrability judgment, each integrable set is considered as a community, a densely connected sub-graph where nodes and edges correspond to tuples in the table and their pairwise integrability, respectively. We proceed to investigate various community detection algorithms to address the integrable set discovery objective. Moving forward to tackle multi-tuple conflict resolution, we introduce an novel in-context learning methodology. This approach capitalizes on the knowledge embedded within pretrained large language models to effectively resolve conflicts that arise when integrating multiple tuples. Notably, our method minimizes the need for annotated data. Since no suitable test collections are available for our tasks, we develop our own benchmarks using two real-word dataset repositories: Real and Join. We conduct extensive experiments on these benchmarks to validate the robustness and applicability of our methodologies in the context of integrating tables within data lakes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00324v1-abstract-full').style.display = 'none'; document.getElementById('2412.00324v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19626">arXiv:2411.19626</a> <span> [<a href="https://arxiv.org/pdf/2411.19626">pdf</a>, <a href="https://arxiv.org/format/2411.19626">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GREAT: Geometry-Intention Collaborative Inference for Open-Vocabulary 3D Object Affordance Grounding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+Y">Yawen Shao</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+W">Wei Zhai</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuhang Yang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongchen Luo</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Zha%2C+Z">Zheng-Jun Zha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19626v1-abstract-short" style="display: inline;"> Open-Vocabulary 3D object affordance grounding aims to anticipate ``action possibilities'' regions on 3D objects with arbitrary instructions, which is crucial for robots to generically perceive real scenarios and respond to operational changes. Existing methods focus on combining images or languages that depict interactions with 3D geometries to introduce external interaction priors. However, they… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19626v1-abstract-full').style.display = 'inline'; document.getElementById('2411.19626v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19626v1-abstract-full" style="display: none;"> Open-Vocabulary 3D object affordance grounding aims to anticipate ``action possibilities'' regions on 3D objects with arbitrary instructions, which is crucial for robots to generically perceive real scenarios and respond to operational changes. Existing methods focus on combining images or languages that depict interactions with 3D geometries to introduce external interaction priors. However, they are still vulnerable to a limited semantic space by failing to leverage implied invariant geometries and potential interaction intentions. Normally, humans address complex tasks through multi-step reasoning and respond to diverse situations by leveraging associative and analogical thinking. In light of this, we propose GREAT (GeometRy-intEntion collAboraTive inference) for Open-Vocabulary 3D Object Affordance Grounding, a novel framework that mines the object invariant geometry attributes and performs analogically reason in potential interaction scenarios to form affordance knowledge, fully combining the knowledge with both geometries and visual contents to ground 3D object affordance. Besides, we introduce the Point Image Affordance Dataset v2 (PIADv2), the largest 3D object affordance dataset at present to support the task. Extensive experiments demonstrate the effectiveness and superiority of GREAT. Code and dataset are available at project. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19626v1-abstract-full').style.display = 'none'; document.getElementById('2411.19626v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17558">arXiv:2411.17558</a> <span> [<a href="https://arxiv.org/pdf/2411.17558">pdf</a>, <a href="https://arxiv.org/format/2411.17558">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Natural Language Understanding and Inference with MLLM in Visual Question Answering: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kuang%2C+J">Jiayi Kuang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jingyou Xie</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haohao Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ronghao Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhe Xu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xianfeng Cheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yinghui Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xika Lin</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Ying Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17558v1-abstract-short" style="display: inline;"> Visual Question Answering (VQA) is a challenge task that combines natural language processing and computer vision techniques and gradually becomes a benchmark test task in multimodal large language models (MLLMs). The goal of our survey is to provide an overview of the development of VQA and a detailed description of the latest models with high timeliness. This survey gives an up-to-date synthesis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17558v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17558v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17558v1-abstract-full" style="display: none;"> Visual Question Answering (VQA) is a challenge task that combines natural language processing and computer vision techniques and gradually becomes a benchmark test task in multimodal large language models (MLLMs). The goal of our survey is to provide an overview of the development of VQA and a detailed description of the latest models with high timeliness. This survey gives an up-to-date synthesis of natural language understanding of images and text, as well as the knowledge reasoning module based on image-question information on the core VQA tasks. In addition, we elaborate on recent advances in extracting and fusing modal information with vision-language pretraining models and multimodal large language models in VQA. We also exhaustively review the progress of knowledge reasoning in VQA by detailing the extraction of internal knowledge and the introduction of external knowledge. Finally, we present the datasets of VQA and different evaluation metrics and discuss possible directions for future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17558v1-abstract-full').style.display = 'none'; document.getElementById('2411.17558v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16082">arXiv:2411.16082</a> <span> [<a href="https://arxiv.org/pdf/2411.16082">pdf</a>, <a href="https://arxiv.org/format/2411.16082">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Leverage Task Context for Object Affordance Ranking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haojie Huang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongchen Luo</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+W">Wei Zhai</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Zha%2C+Z">Zheng-Jun Zha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16082v1-abstract-short" style="display: inline;"> Intelligent agents accomplish different tasks by utilizing various objects based on their affordance, but how to select appropriate objects according to task context is not well-explored. Current studies treat objects within the affordance category as equivalent, ignoring that object affordances vary in priority with different task contexts, hindering accurate decision-making in complex environmen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16082v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16082v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16082v1-abstract-full" style="display: none;"> Intelligent agents accomplish different tasks by utilizing various objects based on their affordance, but how to select appropriate objects according to task context is not well-explored. Current studies treat objects within the affordance category as equivalent, ignoring that object affordances vary in priority with different task contexts, hindering accurate decision-making in complex environments. To enable agents to develop a deeper understanding of the objects required to perform tasks, we propose to leverage task context for object affordance ranking, i.e., given image of a complex scene and the textual description of the affordance and task context, revealing task-object relationships and clarifying the priority rank of detected objects. To this end, we propose a novel Context-embed Group Ranking Framework with task relation mining module and graph group update module to deeply integrate task context and perform global relative relationship transmission. Due to the lack of such data, we construct the first large-scale task-oriented affordance ranking dataset with 25 common tasks, over 50k images and more than 661k objects. Experimental results demonstrate the feasibility of the task context based affordance learning paradigm and the superiority of our model over state-of-the-art models in the fields of saliency ranking and multimodal object detection. The source code and dataset will be made available to the public. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16082v1-abstract-full').style.display = 'none'; document.getElementById('2411.16082v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15185">arXiv:2411.15185</a> <span> [<a href="https://arxiv.org/pdf/2411.15185">pdf</a>, <a href="https://arxiv.org/format/2411.15185">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Gaussian Process Regression with Temporal Feature Extraction for Partially Interpretable Remaining Useful Life Interval Prediction in Aeroengine Prognostics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Niu%2C+T">Tian Niu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zijun Xu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Heng Luo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Ziqing Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15185v1-abstract-short" style="display: inline;"> The estimation of Remaining Useful Life (RUL) plays a pivotal role in intelligent manufacturing systems and Industry 4.0 technologies. While recent advancements have improved RUL prediction, many models still face interpretability and compelling uncertainty modeling challenges. This paper introduces a modified Gaussian Process Regression (GPR) model for RUL interval prediction, tailored for the co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15185v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15185v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15185v1-abstract-full" style="display: none;"> The estimation of Remaining Useful Life (RUL) plays a pivotal role in intelligent manufacturing systems and Industry 4.0 technologies. While recent advancements have improved RUL prediction, many models still face interpretability and compelling uncertainty modeling challenges. This paper introduces a modified Gaussian Process Regression (GPR) model for RUL interval prediction, tailored for the complexities of manufacturing process development. The modified GPR predicts confidence intervals by learning from historical data and addresses uncertainty modeling in a more structured way. The approach effectively captures intricate time-series patterns and dynamic behaviors inherent in modern manufacturing systems by coupling GPR with deep adaptive learning-enhanced AI process models. Moreover, the model evaluates feature significance to ensure more transparent decision-making, which is crucial for optimizing manufacturing processes. This comprehensive approach supports more accurate RUL predictions and provides transparent, interpretable insights into uncertainty, contributing to robust process development and management. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15185v1-abstract-full').style.display = 'none'; document.getElementById('2411.15185v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12177">arXiv:2411.12177</a> <span> [<a href="https://arxiv.org/pdf/2411.12177">pdf</a>, <a href="https://arxiv.org/format/2411.12177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robust 3D Semantic Occupancy Prediction with Calibration-free Spatial Transformation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhuang%2C+Z">Zhuangwei Zhuang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziyin Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Sitao Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lizhao Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hui Luo</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+M">Mingkui Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12177v1-abstract-short" style="display: inline;"> 3D semantic occupancy prediction, which seeks to provide accurate and comprehensive representations of environment scenes, is important to autonomous driving systems. For autonomous cars equipped with multi-camera and LiDAR, it is critical to aggregate multi-sensor information into a unified 3D space for accurate and robust predictions. Recent methods are mainly built on the 2D-to-3D transformatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12177v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12177v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12177v1-abstract-full" style="display: none;"> 3D semantic occupancy prediction, which seeks to provide accurate and comprehensive representations of environment scenes, is important to autonomous driving systems. For autonomous cars equipped with multi-camera and LiDAR, it is critical to aggregate multi-sensor information into a unified 3D space for accurate and robust predictions. Recent methods are mainly built on the 2D-to-3D transformation that relies on sensor calibration to project the 2D image information into the 3D space. These methods, however, suffer from two major limitations: First, they rely on accurate sensor calibration and are sensitive to the calibration noise, which limits their application in real complex environments. Second, the spatial transformation layers are computationally expensive and limit their running on an autonomous vehicle. In this work, we attempt to exploit a Robust and Efficient 3D semantic Occupancy (REO) prediction scheme. To this end, we propose a calibration-free spatial transformation based on vanilla attention to implicitly model the spatial correspondence. In this way, we robustly project the 2D features to a predefined BEV plane without using sensor calibration as input. Then, we introduce 2D and 3D auxiliary training tasks to enhance the discrimination power of 2D backbones on spatial, semantic, and texture features. Last, we propose a query-based prediction scheme to efficiently generate large-scale fine-grained occupancy predictions. By fusing point clouds that provide complementary spatial information, our REO surpasses the existing methods by a large margin on three benchmarks, including OpenOccupancy, Occ3D-nuScenes, and SemanticKITTI Scene Completion. For instance, our REO achieves 19.8$\times$ speedup compared to Co-Occ, with 1.1 improvements in geometry IoU on OpenOccupancy. Our code will be available at https://github.com/ICEORY/REO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12177v1-abstract-full').style.display = 'none'; document.getElementById('2411.12177v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 11 figures, 18 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10926">arXiv:2411.10926</a> <span> [<a href="https://arxiv.org/pdf/2411.10926">pdf</a>, <a href="https://arxiv.org/format/2411.10926">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TNSE.2024.3498042">10.1109/TNSE.2024.3498042 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Link-identified Routing Architecture in Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hefan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhiyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shan Zhang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Q">Qingkai Meng</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongbin Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10926v1-abstract-short" style="display: inline;"> Low earth orbit (LEO) satellite networks have the potential to provide low-latency communication with global coverage. To unleash this potential, it is crucial to achieve efficient packet delivery. In this paper, we propose a Link-identified Routing (LiR) architecture for LEO satellite networks. The LiR architecture leverages the deterministic neighbor relation of LEO constellations, and identifie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10926v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10926v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10926v1-abstract-full" style="display: none;"> Low earth orbit (LEO) satellite networks have the potential to provide low-latency communication with global coverage. To unleash this potential, it is crucial to achieve efficient packet delivery. In this paper, we propose a Link-identified Routing (LiR) architecture for LEO satellite networks. The LiR architecture leverages the deterministic neighbor relation of LEO constellations, and identifies each inter-satellite link (ISL). Moreover, LiR architecture adopts source-route-style forwarding based on in-packet bloom filter (BF). Each satellite could efficiently encode multiple ISL identifiers via an in-packet BF to specify the end-to-end path for the packets. Due to false positives caused by BF, the more ISLs are encoded at a time, the more redundant forwarding cases emerge. Based on the topology characteristics, we derive the expected forwarding overhead in a closed-form and propose the optimal encoding policy. To accommodate link-state changes in LEO satellite networks, we propose the on-demand rerouting scheme and the on-demand detouring scheme to address the intermittent ISLs. We also elaborate how to take advantage of LiR architecture to achieve seamless handover for ground-satellite links (GSLs). Finally, we conduct extensive numerical experiments and packet-level simulations to verify our analytical results and evaluate the performance of the LiR architecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10926v1-abstract-full').style.display = 'none'; document.getElementById('2411.10926v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Network Science and Engineering, 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08911">arXiv:2411.08911</a> <span> [<a href="https://arxiv.org/pdf/2411.08911">pdf</a>, <a href="https://arxiv.org/format/2411.08911">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Message Passing Neural Network Surrogate Model for Bond-Associated Peridynamic Material Correspondence Formulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuan Hu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qijun Chen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+N+H">Nicholas H. Luo</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+R+J">Richy J. Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shaofan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08911v1-abstract-short" style="display: inline;"> Peridynamics is a non-local continuum mechanics theory that offers unique advantages for modeling problems involving discontinuities and complex deformations. Within the peridynamic framework, various formulations exist, among which the material correspondence formulation stands out for its ability to directly incorporate traditional continuum material models, making it highly applicable to a rang… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08911v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08911v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08911v1-abstract-full" style="display: none;"> Peridynamics is a non-local continuum mechanics theory that offers unique advantages for modeling problems involving discontinuities and complex deformations. Within the peridynamic framework, various formulations exist, among which the material correspondence formulation stands out for its ability to directly incorporate traditional continuum material models, making it highly applicable to a range of engineering challenges. A notable advancement in this area is the bond-associated correspondence model, which not only resolves issues of material instability but also achieves high computational accuracy. However, the bond-associated model typically requires higher computational costs than FEA, which can limit its practical application. To address this computational challenge, we propose a novel surrogate model based on a message-passing neural network (MPNN) specifically designed for the bond-associated peridynamic material correspondence formulation. Leveraging the similarities between graph structure and the neighborhood connectivity inherent to peridynamics, we construct an MPNN that can transfers domain knowledge from peridynamics into a computational graph and shorten the computation time via GPU acceleration. Unlike conventional graph neural networks that focus on node features, our model emphasizes edge-based features, capturing the essential material point interactions in the formulation. A key advantage of this neural network approach is its flexibility: it does not require fixed neighborhood connectivity, making it adaptable across diverse configurations and scalable for complex systems. Furthermore, the model inherently possesses translational and rotational invariance, enabling it to maintain physical objectivity: a critical requirement for accurate mechanical modeling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08911v1-abstract-full').style.display = 'none'; document.getElementById('2411.08911v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2410.00934</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08703">arXiv:2411.08703</a> <span> [<a href="https://arxiv.org/pdf/2411.08703">pdf</a>, <a href="https://arxiv.org/format/2411.08703">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MVKTrans: Multi-View Knowledge Transfer for Robust Multiomics Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cong%2C+S">Shan Cong</a>, <a href="/search/cs?searchtype=author&query=Sang%2C+Z">Zhiling Sang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongwei Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+H">Hong Liang</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jie Hao</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaohui Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08703v1-abstract-short" style="display: inline;"> The distinct characteristics of multiomics data, including complex interactions within and across biological layers and disease heterogeneity (e.g., heterogeneity in etiology and clinical symptoms), drive us to develop novel designs to address unique challenges in multiomics prediction. In this paper, we propose the multi-view knowledge transfer learning (MVKTrans) framework, which transfers intra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08703v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08703v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08703v1-abstract-full" style="display: none;"> The distinct characteristics of multiomics data, including complex interactions within and across biological layers and disease heterogeneity (e.g., heterogeneity in etiology and clinical symptoms), drive us to develop novel designs to address unique challenges in multiomics prediction. In this paper, we propose the multi-view knowledge transfer learning (MVKTrans) framework, which transfers intra- and inter-omics knowledge in an adaptive manner by reviewing data heterogeneity and suppressing bias transfer, thereby enhancing classification performance. Specifically, we design a graph contrastive module that is trained on unlabeled data to effectively learn and transfer the underlying intra-omics patterns to the supervised task. This unsupervised pretraining promotes learning general and unbiased representations for each modality, regardless of the downstream tasks. In light of the varying discriminative capacities of modalities across different diseases and/or samples, we introduce an adaptive and bi-directional cross-omics distillation module. This module automatically identifies richer modalities and facilitates dynamic knowledge transfer from more informative to less informative omics, thereby enabling a more robust and generalized integration. Extensive experiments on four real biomedical datasets demonstrate the superior performance and robustness of MVKTrans compared to the state-of-the-art. Code and data are available at https://github.com/Yaolab-fantastic/MVKTrans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08703v1-abstract-full').style.display = 'none'; document.getElementById('2411.08703v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06405">arXiv:2411.06405</a> <span> [<a href="https://arxiv.org/pdf/2411.06405">pdf</a>, <a href="https://arxiv.org/format/2411.06405">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Parallel Higher-order Truss Decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+J">Jingya Qian</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hui Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yongye Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoyang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06405v1-abstract-short" style="display: inline;"> The k-truss model is one of the most important models in cohesive subgraph analysis. The k-truss decomposition problem is to compute the trussness of each edge in a given graph, and has been extensively studied. However, the conventional k-truss model is difficult to characterize the fine-grained hierarchical structures in networks due to the neglect of high order information. To overcome the limi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06405v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06405v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06405v1-abstract-full" style="display: none;"> The k-truss model is one of the most important models in cohesive subgraph analysis. The k-truss decomposition problem is to compute the trussness of each edge in a given graph, and has been extensively studied. However, the conventional k-truss model is difficult to characterize the fine-grained hierarchical structures in networks due to the neglect of high order information. To overcome the limitation, the higher-order truss model is proposed in the literature. However, the previous solutions only consider non-parallel scenarios. To fill the gap, in this paper, we conduct the first research to study the problem of parallel higher-order truss decomposition. Specifically, a parallel framework is first proposed. Moreover, several optimizations are further developed to accelerate the processing. Finally, experiments over 6 real-world networks are conducted to verify the performance of proposed methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06405v1-abstract-full').style.display = 'none'; document.getElementById('2411.06405v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05869">arXiv:2411.05869</a> <span> [<a href="https://arxiv.org/pdf/2411.05869">pdf</a>, <a href="https://arxiv.org/format/2411.05869">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Compactly-supported nonstationary kernels for computing exact Gaussian processes on big data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Risser%2C+M+D">Mark D. Risser</a>, <a href="/search/cs?searchtype=author&query=Noack%2C+M+M">Marcus M. Noack</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hengrui Luo</a>, <a href="/search/cs?searchtype=author&query=Pandolfi%2C+R">Ronald Pandolfi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05869v1-abstract-short" style="display: inline;"> The Gaussian process (GP) is a widely used probabilistic machine learning method for stochastic function approximation, stochastic modeling, and analyzing real-world measurements of nonlinear processes. Unlike many other machine learning methods, GPs include an implicit characterization of uncertainty, making them extremely useful across many areas of science, technology, and engineering. Traditio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05869v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05869v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05869v1-abstract-full" style="display: none;"> The Gaussian process (GP) is a widely used probabilistic machine learning method for stochastic function approximation, stochastic modeling, and analyzing real-world measurements of nonlinear processes. Unlike many other machine learning methods, GPs include an implicit characterization of uncertainty, making them extremely useful across many areas of science, technology, and engineering. Traditional implementations of GPs involve stationary kernels (also termed covariance functions) that limit their flexibility and exact methods for inference that prevent application to data sets with more than about ten thousand points. Modern approaches to address stationarity assumptions generally fail to accommodate large data sets, while all attempts to address scalability focus on approximating the Gaussian likelihood, which can involve subjectivity and lead to inaccuracies. In this work, we explicitly derive an alternative kernel that can discover and encode both sparsity and nonstationarity. We embed the kernel within a fully Bayesian GP model and leverage high-performance computing resources to enable the analysis of massive data sets. We demonstrate the favorable performance of our novel kernel relative to existing exact and approximate GP methods across a variety of synthetic data examples. Furthermore, we conduct space-time prediction based on more than one million measurements of daily maximum temperature and verify that our results outperform state-of-the-art methods in the Earth sciences. More broadly, having access to exact GPs that use ultra-scalable, sparsity-discovering, nonstationary kernels allows GP methods to truly compete with a wide variety of machine learning methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05869v1-abstract-full').style.display = 'none'; document.getElementById('2411.05869v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03169">arXiv:2411.03169</a> <span> [<a href="https://arxiv.org/pdf/2411.03169">pdf</a>, <a href="https://arxiv.org/format/2411.03169">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Pre-trained Visual Dynamics Representations for Efficient Policy Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+B">Bohan Zhou</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zongqing Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03169v1-abstract-short" style="display: inline;"> Pre-training for Reinforcement Learning (RL) with purely video data is a valuable yet challenging problem. Although in-the-wild videos are readily available and inhere a vast amount of prior world knowledge, the absence of action annotations and the common domain gap with downstream tasks hinder utilizing videos for RL pre-training. To address the challenge of pre-training with videos, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03169v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03169v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03169v1-abstract-full" style="display: none;"> Pre-training for Reinforcement Learning (RL) with purely video data is a valuable yet challenging problem. Although in-the-wild videos are readily available and inhere a vast amount of prior world knowledge, the absence of action annotations and the common domain gap with downstream tasks hinder utilizing videos for RL pre-training. To address the challenge of pre-training with videos, we propose Pre-trained Visual Dynamics Representations (PVDR) to bridge the domain gap between videos and downstream tasks for efficient policy learning. By adopting video prediction as a pre-training task, we use a Transformer-based Conditional Variational Autoencoder (CVAE) to learn visual dynamics representations. The pre-trained visual dynamics representations capture the visual dynamics prior knowledge in the videos. This abstract prior knowledge can be readily adapted to downstream tasks and aligned with executable actions through online adaptation. We conduct experiments on a series of robotics visual control tasks and verify that PVDR is an effective form for pre-training with videos to promote policy learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03169v1-abstract-full').style.display = 'none'; document.getElementById('2411.03169v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02041">arXiv:2411.02041</a> <span> [<a href="https://arxiv.org/pdf/2411.02041">pdf</a>, <a href="https://arxiv.org/format/2411.02041">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing ID-based Recommendation with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lei Chen</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chen Gao</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xiaoyi Du</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hengliang Luo</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+D">Depeng Jin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yong Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Meng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02041v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have recently garnered significant attention in various domains, including recommendation systems. Recent research leverages the capabilities of LLMs to improve the performance and user modeling aspects of recommender systems. These studies primarily focus on utilizing LLMs to interpret textual data in recommendation tasks. However, it's worth noting that in ID-based r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02041v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02041v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02041v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have recently garnered significant attention in various domains, including recommendation systems. Recent research leverages the capabilities of LLMs to improve the performance and user modeling aspects of recommender systems. These studies primarily focus on utilizing LLMs to interpret textual data in recommendation tasks. However, it's worth noting that in ID-based recommendations, textual data is absent, and only ID data is available. The untapped potential of LLMs for ID data within the ID-based recommendation paradigm remains relatively unexplored. To this end, we introduce a pioneering approach called "LLM for ID-based Recommendation" (LLM4IDRec). This innovative approach integrates the capabilities of LLMs while exclusively relying on ID data, thus diverging from the previous reliance on textual data. The basic idea of LLM4IDRec is that by employing LLM to augment ID data, if augmented ID data can improve recommendation performance, it demonstrates the ability of LLM to interpret ID data effectively, exploring an innovative way for the integration of LLM in ID-based recommendation. We evaluate the effectiveness of our LLM4IDRec approach using three widely-used datasets. Our results demonstrate a notable improvement in recommendation performance, with our approach consistently outperforming existing methods in ID-based recommendation by solely augmenting input data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02041v1-abstract-full').style.display = 'none'; document.getElementById('2411.02041v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.24039">arXiv:2410.24039</a> <span> [<a href="https://arxiv.org/pdf/2410.24039">pdf</a>, <a href="https://arxiv.org/format/2410.24039">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Efficient Satellite-Ground Interconnection Design for Low-orbit Mega-Constellation Topology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenhao Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiazhi Wu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Quanwei Lin</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Handong Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+K">Kun Qiu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhe Chen</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yue Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.24039v1-abstract-short" style="display: inline;"> The low-orbit mega-constellation network (LMCN) is an important part of the space-air-ground integrated network system. An effective satellite-ground interconnection design can result in a stable constellation topology for LMCNs. A naive solution is accessing the satellite with the longest remaining service time (LRST), which is widely used in previous designs. The Coordinated Satellite-Ground Int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24039v1-abstract-full').style.display = 'inline'; document.getElementById('2410.24039v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.24039v1-abstract-full" style="display: none;"> The low-orbit mega-constellation network (LMCN) is an important part of the space-air-ground integrated network system. An effective satellite-ground interconnection design can result in a stable constellation topology for LMCNs. A naive solution is accessing the satellite with the longest remaining service time (LRST), which is widely used in previous designs. The Coordinated Satellite-Ground Interconnecting (CSGI), the state-of-the-art algorithm, coordinates the establishment of ground-satellite links (GSLs). Compared with existing solutions, it reduces latency by 19% and jitter by 70% on average. However, CSGI only supports the scenario where terminals access only one satellite and cannot fully utilize the multi-access capabilities of terminals. Additionally, CSGI's high computational complexity poses deployment challenges. To overcome these problems, we propose the Classification-based Longest Remaining Service Time (C-LRST) algorithm. C-LRST supports the actual scenario with multi-access capabilities. It adds optional paths during routing with low computational complexity, improving end-to-end communications quality. We conduct our 1000s simulation from Brazil to Lithuania on the open-source platform Hypatia. Experiment results show that compared with CSGI, C-LRST reduces the latency and increases the throughput by approximately 60% and 40%, respectively. In addition, C-LRST's GSL switching number is 14, whereas CSGI is 23. C-LRST has better link stability than CSGI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24039v1-abstract-full').style.display = 'none'; document.getElementById('2410.24039v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23857">arXiv:2410.23857</a> <span> [<a href="https://arxiv.org/pdf/2410.23857">pdf</a>, <a href="https://arxiv.org/format/2410.23857">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> ECDQC: Efficient Compilation for Distributed Quantum Computing with Linear Layout </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kecheng Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yidong Zhou</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haochen Luo</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+L">Lingjun Xiong</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuchen Zhu</a>, <a href="/search/cs?searchtype=author&query=Casey%2C+E">Eilis Casey</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+J">Jinglei Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S+Y">Samuel Yen-Chi Chen</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Z">Zhiding Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23857v2-abstract-short" style="display: inline;"> In this paper, we propose an efficient compilation method for distributed quantum computing (DQC) using the Linear Nearest Neighbor (LNN) architecture. By exploiting the LNN topology's symmetry, we optimize quantum circuit compilation for High Local Connectivity, Sparse Full Connectivity (HLC-SFC) algorithms like Quantum Approximate Optimization Algorithm (QAOA) and Quantum Fourier Transform (QFT)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23857v2-abstract-full').style.display = 'inline'; document.getElementById('2410.23857v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23857v2-abstract-full" style="display: none;"> In this paper, we propose an efficient compilation method for distributed quantum computing (DQC) using the Linear Nearest Neighbor (LNN) architecture. By exploiting the LNN topology's symmetry, we optimize quantum circuit compilation for High Local Connectivity, Sparse Full Connectivity (HLC-SFC) algorithms like Quantum Approximate Optimization Algorithm (QAOA) and Quantum Fourier Transform (QFT). We also utilize dangling qubits to minimize non-local interactions and reduce SWAP gates. Our approach significantly decreases compilation time, gate count, and circuit depth, improving scalability and robustness for large-scale quantum computations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23857v2-abstract-full').style.display = 'none'; document.getElementById('2410.23857v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18415">arXiv:2410.18415</a> <span> [<a href="https://arxiv.org/pdf/2410.18415">pdf</a>, <a href="https://arxiv.org/format/2410.18415">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Decoding on Graphs: Faithful and Sound Reasoning on Knowledge Graphs through Generation of Well-Formed Chains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+K">Kun Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianhua Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xixin Wu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongyin Luo</a>, <a href="/search/cs?searchtype=author&query=Glass%2C+J">James Glass</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18415v1-abstract-short" style="display: inline;"> Knowledge Graphs (KGs) can serve as reliable knowledge sources for question answering (QA) due to their structured representation of knowledge. Existing research on the utilization of KG for large language models (LLMs) prevalently relies on subgraph retriever or iterative prompting, overlooking the potential synergy of LLMs' step-wise reasoning capabilities and KGs' structural nature. In this pap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18415v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18415v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18415v1-abstract-full" style="display: none;"> Knowledge Graphs (KGs) can serve as reliable knowledge sources for question answering (QA) due to their structured representation of knowledge. Existing research on the utilization of KG for large language models (LLMs) prevalently relies on subgraph retriever or iterative prompting, overlooking the potential synergy of LLMs' step-wise reasoning capabilities and KGs' structural nature. In this paper, we present DoG (Decoding on Graphs), a novel framework that facilitates a deep synergy between LLMs and KGs. We first define a concept, well-formed chain, which consists of a sequence of interrelated fact triplets on the KGs, starting from question entities and leading to answers. We argue that this concept can serve as a principle for making faithful and sound reasoning for KGQA. To enable LLMs to generate well-formed chains, we propose graph-aware constrained decoding, in which a constraint derived from the topology of the KG regulates the decoding process of the LLMs. This constrained decoding method ensures the generation of well-formed chains while making full use of the step-wise reasoning capabilities of LLMs. Based on the above, DoG, a training-free approach, is able to provide faithful and sound reasoning trajectories grounded on the KGs. Experiments across various KGQA tasks with different background KGs demonstrate that DoG achieves superior and robust performance. DoG also shows general applicability with various open-source LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18415v1-abstract-full').style.display = 'none'; document.getElementById('2410.18415v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Luo%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository