CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 462 results for author: <span class="mathjax">Guo, W</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Guo%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Guo, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Guo%2C+W&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Guo, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Guo%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10392">arXiv:2502.10392</a> <span> [<a href="https://arxiv.org/pdf/2502.10392">pdf</a>, <a href="https://arxiv.org/format/2502.10392">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Text-guided Sparse Voxel Pruning for Efficient 3D Visual Grounding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenxuan Guo</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiuwei Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziwei Wang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+J">Jianjiang Feng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jie Zhou</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jiwen Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10392v1-abstract-short" style="display: inline;"> In this paper, we propose an efficient multi-level convolution architecture for 3D visual grounding. Conventional methods are difficult to meet the requirements of real-time inference due to the two-stage or point-based architecture. Inspired by the success of multi-level fully sparse convolutional architecture in 3D object detection, we aim to build a new 3D visual grounding framework following t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10392v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10392v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10392v1-abstract-full" style="display: none;"> In this paper, we propose an efficient multi-level convolution architecture for 3D visual grounding. Conventional methods are difficult to meet the requirements of real-time inference due to the two-stage or point-based architecture. Inspired by the success of multi-level fully sparse convolutional architecture in 3D object detection, we aim to build a new 3D visual grounding framework following this technical route. However, as in 3D visual grounding task the 3D scene representation should be deeply interacted with text features, sparse convolution-based architecture is inefficient for this interaction due to the large amount of voxel features. To this end, we propose text-guided pruning (TGP) and completion-based addition (CBA) to deeply fuse 3D scene representation and text features in an efficient way by gradual region pruning and target completion. Specifically, TGP iteratively sparsifies the 3D scene representation and thus efficiently interacts the voxel features with text features by cross-attention. To mitigate the affect of pruning on delicate geometric information, CBA adaptively fixes the over-pruned region by voxel completion with negligible computational overhead. Compared with previous single-stage methods, our method achieves top inference speed and surpasses previous fastest method by 100\% FPS. Our method also achieves state-of-the-art accuracy even compared with two-stage methods, with $+1.13$ lead of Acc@0.5 on ScanRefer, and $+2.6$ and $+3.2$ leads on NR3D and SR3D respectively. The code is available at \href{https://github.com/GWxuan/TSP3D}{https://github.com/GWxuan/TSP3D}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10392v1-abstract-full').style.display = 'none'; document.getElementById('2502.10392v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05761">arXiv:2502.05761</a> <span> [<a href="https://arxiv.org/pdf/2502.05761">pdf</a>, <a href="https://arxiv.org/format/2502.05761">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> 3CAD: A Large-Scale Real-World 3C Product Dataset for Unsupervised Anomaly </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+E">Enquan Yang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+P">Peng Xing</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hanyang Sun</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yuanwei Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zechao Li</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+D">Dan Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05761v1-abstract-short" style="display: inline;"> Industrial anomaly detection achieves progress thanks to datasets such as MVTec-AD and VisA. However, they suf- fer from limitations in terms of the number of defect sam- ples, types of defects, and availability of real-world scenes. These constraints inhibit researchers from further exploring the performance of industrial detection with higher accuracy. To this end, we propose a new large-scale a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05761v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05761v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05761v1-abstract-full" style="display: none;"> Industrial anomaly detection achieves progress thanks to datasets such as MVTec-AD and VisA. However, they suf- fer from limitations in terms of the number of defect sam- ples, types of defects, and availability of real-world scenes. These constraints inhibit researchers from further exploring the performance of industrial detection with higher accuracy. To this end, we propose a new large-scale anomaly detection dataset called 3CAD, which is derived from real 3C produc- tion lines. Specifically, the proposed 3CAD includes eight different types of manufactured parts, totaling 27,039 high- resolution images labeled with pixel-level anomalies. The key features of 3CAD are that it covers anomalous regions of different sizes, multiple anomaly types, and the possibility of multiple anomalous regions and multiple anomaly types per anomaly image. This is the largest and first anomaly de- tection dataset dedicated to 3C product quality control for community exploration and development. Meanwhile, we in- troduce a simple yet effective framework for unsupervised anomaly detection: a Coarse-to-Fine detection paradigm with Recovery Guidance (CFRG). To detect small defect anoma- lies, the proposed CFRG utilizes a coarse-to-fine detection paradigm. Specifically, we utilize a heterogeneous distilla- tion model for coarse localization and then fine localiza- tion through a segmentation model. In addition, to better capture normal patterns, we introduce recovery features as guidance. Finally, we report the results of our CFRG frame- work and popular anomaly detection methods on the 3CAD dataset, demonstrating strong competitiveness and providing a highly challenging benchmark to promote the development of the anomaly detection field. Data and code are available: https://github.com/EnquanYang2022/3CAD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05761v1-abstract-full').style.display = 'none'; document.getElementById('2502.05761v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accept by AAAI2025, github: https://github.com/EnquanYang2022/3CAD</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05174">arXiv:2502.05174</a> <span> [<a href="https://arxiv.org/pdf/2502.05174">pdf</a>, <a href="https://arxiv.org/format/2502.05174">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MELON: Indirect Prompt Injection Defense via Masked Re-execution and Tool Comparison </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+K">Kaijie Zhu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xianjun Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jindong Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W+Y">William Yang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05174v1-abstract-short" style="display: inline;"> Recent research has explored that LLM agents are vulnerable to indirect prompt injection (IPI) attacks, where malicious tasks embedded in tool-retrieved information can redirect the agent to take unauthorized actions. Existing defenses against IPI have significant limitations: either require essential model training resources, lack effectiveness against sophisticated attacks, or harm the normal ut… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05174v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05174v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05174v1-abstract-full" style="display: none;"> Recent research has explored that LLM agents are vulnerable to indirect prompt injection (IPI) attacks, where malicious tasks embedded in tool-retrieved information can redirect the agent to take unauthorized actions. Existing defenses against IPI have significant limitations: either require essential model training resources, lack effectiveness against sophisticated attacks, or harm the normal utilities. We present MELON (Masked re-Execution and TooL comparisON), a novel IPI defense. Our approach builds on the observation that under a successful attack, the agent's next action becomes less dependent on user tasks and more on malicious tasks. Following this, we design MELON to detect attacks by re-executing the agent's trajectory with a masked user prompt modified through a masking function. We identify an attack if the actions generated in the original and masked executions are similar. We also include three key designs to reduce the potential false positives and false negatives. Extensive evaluation on the IPI benchmark AgentDojo demonstrates that MELON outperforms SOTA defenses in both attack prevention and utility preservation. Moreover, we show that combining MELON with a SOTA prompt augmentation defense (denoted as MELON-Aug) further improves its performance. We also conduct a detailed ablation study to validate our key designs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05174v1-abstract-full').style.display = 'none'; document.getElementById('2502.05174v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04575">arXiv:2502.04575</a> <span> [<a href="https://arxiv.org/pdf/2502.04575">pdf</a>, <a href="https://arxiv.org/ps/2502.04575">ps</a>, <a href="https://arxiv.org/format/2502.04575">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> </div> <p class="title is-5 mathjax"> Complexity Analysis of Normalizing Constant Estimation: from Jarzynski Equality to Annealed Importance Sampling and beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+M">Molei Tao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yongxin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04575v1-abstract-short" style="display: inline;"> Given an unnormalized probability density $蟺\propto\mathrm{e}^{-V}$, estimating its normalizing constant $Z=\int_{\mathbb{R}^d}\mathrm{e}^{-V(x)}\mathrm{d}x$ or free energy $F=-\log Z$ is a crucial problem in Bayesian statistics, statistical mechanics, and machine learning. It is challenging especially in high dimensions or when $蟺$ is multimodal. To mitigate the high variance of conventional impo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04575v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04575v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04575v1-abstract-full" style="display: none;"> Given an unnormalized probability density $蟺\propto\mathrm{e}^{-V}$, estimating its normalizing constant $Z=\int_{\mathbb{R}^d}\mathrm{e}^{-V(x)}\mathrm{d}x$ or free energy $F=-\log Z$ is a crucial problem in Bayesian statistics, statistical mechanics, and machine learning. It is challenging especially in high dimensions or when $蟺$ is multimodal. To mitigate the high variance of conventional importance sampling estimators, annealing-based methods such as Jarzynski equality and annealed importance sampling are commonly adopted, yet their quantitative complexity guarantees remain largely unexplored. We take a first step toward a non-asymptotic analysis of annealed importance sampling. In particular, we derive an oracle complexity of $\widetilde{O}\left(\frac{d尾^2{\mathcal{A}}^2}{\varepsilon^4}\right)$ for estimating $Z$ within $\varepsilon$ relative error with high probability, where $尾$ is the smoothness of $V$ and $\mathcal{A}$ denotes the action of a curve of probability measures interpolating $蟺$ and a tractable reference distribution. Our analysis, leveraging Girsanov theorem and optimal transport, does not explicitly require isoperimetric assumptions on the target distribution. Finally, to tackle the large action of the widely used geometric interpolation of probability distributions, we propose a new normalizing constant estimation algorithm based on reverse diffusion samplers and establish a framework for analyzing its complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04575v1-abstract-full').style.display = 'none'; document.getElementById('2502.04575v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03036">arXiv:2502.03036</a> <span> [<a href="https://arxiv.org/pdf/2502.03036">pdf</a>, <a href="https://arxiv.org/format/2502.03036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> FuXi-$伪$: Scaling Recommendation Model with Feature Interaction Enhanced Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yufei Ye</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Chin%2C+J+Y">Jin Yao Chin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Hong Zhu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xi Lin</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yuyang Ye</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Enhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03036v1-abstract-short" style="display: inline;"> Inspired by scaling laws and large language models, research on large-scale recommendation models has gained significant attention. Recent advancements have shown that expanding sequential recommendation models to large-scale recommendation models can be an effective strategy. Current state-of-the-art sequential recommendation models primarily use self-attention mechanisms for explicit feature int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03036v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03036v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03036v1-abstract-full" style="display: none;"> Inspired by scaling laws and large language models, research on large-scale recommendation models has gained significant attention. Recent advancements have shown that expanding sequential recommendation models to large-scale recommendation models can be an effective strategy. Current state-of-the-art sequential recommendation models primarily use self-attention mechanisms for explicit feature interactions among items, while implicit interactions are managed through Feed-Forward Networks (FFNs). However, these models often inadequately integrate temporal and positional information, either by adding them to attention weights or by blending them with latent representations, which limits their expressive power. A recent model, HSTU, further reduces the focus on implicit feature interactions, constraining its performance. We propose a new model called FuXi-$伪$ to address these issues. This model introduces an Adaptive Multi-channel Self-attention mechanism that distinctly models temporal, positional, and semantic features, along with a Multi-stage FFN to enhance implicit feature interactions. Our offline experiments demonstrate that our model outperforms existing models, with its performance continuously improving as the model size increases. Additionally, we conducted an online A/B test within the Huawei Music app, which showed a $4.76\%$ increase in the average number of songs played per user and a $5.10\%$ increase in the average listening duration per user. Our code has been released at https://github.com/USTC-StarTeam/FuXi-alpha. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03036v1-abstract-full').style.display = 'none'; document.getElementById('2502.03036v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02747">arXiv:2502.02747</a> <span> [<a href="https://arxiv.org/pdf/2502.02747">pdf</a>, <a href="https://arxiv.org/format/2502.02747">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> PatchPilot: A Stable and Cost-Efficient Agentic Patching Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongwei Li</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yuheng Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiqi Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02747v1-abstract-short" style="display: inline;"> Recent research builds various patching agents that combine large language models (LLMs) with non-ML tools and achieve promising results on the state-of-the-art (SOTA) software patching benchmark, SWE-Bench. Based on how to determine the patching workflows, existing patching agents can be categorized as agent-based planning methods, which rely on LLMs for planning, and human-based planning methods… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02747v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02747v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02747v1-abstract-full" style="display: none;"> Recent research builds various patching agents that combine large language models (LLMs) with non-ML tools and achieve promising results on the state-of-the-art (SOTA) software patching benchmark, SWE-Bench. Based on how to determine the patching workflows, existing patching agents can be categorized as agent-based planning methods, which rely on LLMs for planning, and human-based planning methods, which follow a pre-defined workflow. At a high level, agent-based planning methods achieve high patching performance but with a high cost and limited stability. Human-based planning methods, on the other hand, are more stable and efficient but have key workflow limitations that compromise their patching performance. In this paper, we propose PatchPilot, an agentic patcher that strikes a balance between patching efficacy, stability, and cost-efficiency. PatchPilot proposes a novel human-based planning workflow with five components: reproduction, localization, generation, validation, and refinement (where refinement is unique to PatchPilot). We introduce novel and customized designs to each component to optimize their effectiveness and efficiency. Through extensive experiments on the SWE-Bench benchmarks, PatchPilot shows a superior performance than existing open-source methods while maintaining low cost (less than 1$ per instance) and ensuring higher stability. We also conduct a detailed ablation study to validate the key designs in each component. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02747v1-abstract-full').style.display = 'none'; document.getElementById('2502.02747v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00234">arXiv:2502.00234</a> <span> [<a href="https://arxiv.org/pdf/2502.00234">pdf</a>, <a href="https://arxiv.org/format/2502.00234">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Fast Solvers for Discrete Diffusion Models: Theory and Applications of High-Order Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yinuo Ren</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haoxuan Chen</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuchen Zhu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yongxin Chen</a>, <a href="/search/cs?searchtype=author&query=Rotskoff%2C+G+M">Grant M. Rotskoff</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+M">Molei Tao</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+L">Lexing Ying</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00234v1-abstract-short" style="display: inline;"> Discrete diffusion models have emerged as a powerful generative modeling framework for discrete data with successful applications spanning from text generation to image synthesis. However, their deployment faces challenges due to the high dimensionality of the state space, necessitating the development of efficient inference algorithms. Current inference approaches mainly fall into two categories:… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00234v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00234v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00234v1-abstract-full" style="display: none;"> Discrete diffusion models have emerged as a powerful generative modeling framework for discrete data with successful applications spanning from text generation to image synthesis. However, their deployment faces challenges due to the high dimensionality of the state space, necessitating the development of efficient inference algorithms. Current inference approaches mainly fall into two categories: exact simulation and approximate methods such as $蟿$-leaping. While exact methods suffer from unpredictable inference time and redundant function evaluations, $蟿$-leaping is limited by its first-order accuracy. In this work, we advance the latter category by tailoring the first extension of high-order numerical inference schemes to discrete diffusion models, enabling larger step sizes while reducing error. We rigorously analyze the proposed schemes and establish the second-order accuracy of the $胃$-trapezoidal method in KL divergence. Empirical evaluations on GPT-2 level text and ImageNet-level image generation tasks demonstrate that our method achieves superior sample quality compared to existing approaches under equivalent computational constraints. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00234v1-abstract-full').style.display = 'none'; document.getElementById('2502.00234v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">38 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16607">arXiv:2501.16607</a> <span> [<a href="https://arxiv.org/pdf/2501.16607">pdf</a>, <a href="https://arxiv.org/format/2501.16607">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> MCTS-SQL: An Effective Framework for Text-to-SQL with Monte Carlo Tree Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+S">Shuozhi Yuan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liming Chen</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Miaomiao Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jin Zhao</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+H">Haoran Peng</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenming Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16607v1-abstract-short" style="display: inline;"> Text-to-SQL is a fundamental and longstanding problem in the NLP area, aiming at converting natural language queries into SQL, enabling non-expert users to operate databases. Recent advances in LLM have greatly improved text-to-SQL performance. However, challenges persist, especially when dealing with complex user queries. Current approaches (e.g., COT prompting and multi-agent frameworks) rely on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16607v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16607v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16607v1-abstract-full" style="display: none;"> Text-to-SQL is a fundamental and longstanding problem in the NLP area, aiming at converting natural language queries into SQL, enabling non-expert users to operate databases. Recent advances in LLM have greatly improved text-to-SQL performance. However, challenges persist, especially when dealing with complex user queries. Current approaches (e.g., COT prompting and multi-agent frameworks) rely on the ability of models to plan and generate SQL autonomously, but controlling performance remains difficult. In addition, LLMs are still prone to hallucinations. To alleviate these challenges, we designed a novel MCTS-SQL to guide SQL generation iteratively. The approach generates SQL queries through Monte Carlo Tree Search (MCTS) and a heuristic self-refinement mechanism are used to enhance accuracy and reliability. Key components include a schema selector for extracting relevant information and an MCTS-based generator for iterative query refinement. Experimental results from the SPIDER and BIRD benchmarks show that MCTS-SQL achieves state-of-the-art performance. Specifically, on the BIRD development dataset, MCTS-SQL achieves an Execution (EX) accuracy of 69.40% using GPT-4o as the base model and a significant improvement when dealing with challenging tasks, with an EX of 51.48%, which is 3.41% higher than the existing method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16607v1-abstract-full').style.display = 'none'; document.getElementById('2501.16607v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06912">arXiv:2501.06912</a> <span> [<a href="https://arxiv.org/pdf/2501.06912">pdf</a>, <a href="https://arxiv.org/format/2501.06912">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Efficient Phishing URL Detection Using Graph-based Machine Learning and Loopy Belief Propagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenye Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qun Wang</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+H">Hao Yue</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Haijian Sun</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+R+Q">Rose Qingyang Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06912v1-abstract-short" style="display: inline;"> The proliferation of mobile devices and online interactions have been threatened by different cyberattacks, where phishing attacks and malicious Uniform Resource Locators (URLs) pose significant risks to user security. Traditional phishing URL detection methods primarily rely on URL string-based features, which attackers often manipulate to evade detection. To address these limitations, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06912v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06912v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06912v1-abstract-full" style="display: none;"> The proliferation of mobile devices and online interactions have been threatened by different cyberattacks, where phishing attacks and malicious Uniform Resource Locators (URLs) pose significant risks to user security. Traditional phishing URL detection methods primarily rely on URL string-based features, which attackers often manipulate to evade detection. To address these limitations, we propose a novel graph-based machine learning model for phishing URL detection, integrating both URL structure and network-level features such as IP addresses and authoritative name servers. Our approach leverages Loopy Belief Propagation (LBP) with an enhanced convergence strategy to enable effective message passing and stable classification in the presence of complex graph structures. Additionally, we introduce a refined edge potential mechanism that dynamically adapts based on entity similarity and label relationships to further improve classification accuracy. Comprehensive experiments on real-world datasets demonstrate our model's effectiveness by achieving F1 score of up to 98.77\%. This robust and reproducible method advances phishing detection capabilities, offering enhanced reliability and valuable insights in the field of cybersecurity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06912v1-abstract-full').style.display = 'none'; document.getElementById('2501.06912v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19877">arXiv:2412.19877</a> <span> [<a href="https://arxiv.org/pdf/2412.19877">pdf</a>, <a href="https://arxiv.org/format/2412.19877">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Image Classification with Deep Reinforcement Active Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiu%2C+M">Mingyuan Jiu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xuguang Song</a>, <a href="/search/cs?searchtype=author&query=Sahbi%2C+H">Hichem Sahbi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shupan Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yan Chen</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+L">Lihua Guo</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Mingliang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19877v1-abstract-short" style="display: inline;"> Deep learning is currently reaching outstanding performances on different tasks, including image classification, especially when using large neural networks. The success of these models is tributary to the availability of large collections of labeled training data. In many real-world scenarios, labeled data are scarce, and their hand-labeling is time, effort and cost demanding. Active learning is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19877v1-abstract-full').style.display = 'inline'; document.getElementById('2412.19877v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19877v1-abstract-full" style="display: none;"> Deep learning is currently reaching outstanding performances on different tasks, including image classification, especially when using large neural networks. The success of these models is tributary to the availability of large collections of labeled training data. In many real-world scenarios, labeled data are scarce, and their hand-labeling is time, effort and cost demanding. Active learning is an alternative paradigm that mitigates the effort in hand-labeling data, where only a small fraction is iteratively selected from a large pool of unlabeled data, and annotated by an expert (a.k.a oracle), and eventually used to update the learning models. However, existing active learning solutions are dependent on handcrafted strategies that may fail in highly variable learning environments (datasets, scenarios, etc). In this work, we devise an adaptive active learning method based on Markov Decision Process (MDP). Our framework leverages deep reinforcement learning and active learning together with a Deep Deterministic Policy Gradient (DDPG) in order to dynamically adapt sample selection strategies to the oracle's feedback and the learning environment. Extensive experiments conducted on three different image classification benchmarks show superior performances against several existing active learning strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19877v1-abstract-full').style.display = 'none'; document.getElementById('2412.19877v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16946">arXiv:2412.16946</a> <span> [<a href="https://arxiv.org/pdf/2412.16946">pdf</a>, <a href="https://arxiv.org/format/2412.16946">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Video Domain Incremental Learning for Human Action Recognition in Home Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yuanda Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xing Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Meiying Li</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yate Ge</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xiaohua Sun</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weiwei Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16946v1-abstract-short" style="display: inline;"> It is significantly challenging to recognize daily human actions in homes due to the diversity and dynamic changes in unconstrained home environments. It spurs the need to continually adapt to various users and scenes. Fine-tuning current video understanding models on newly encountered domains often leads to catastrophic forgetting, where the models lose their ability to perform well on previously… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16946v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16946v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16946v1-abstract-full" style="display: none;"> It is significantly challenging to recognize daily human actions in homes due to the diversity and dynamic changes in unconstrained home environments. It spurs the need to continually adapt to various users and scenes. Fine-tuning current video understanding models on newly encountered domains often leads to catastrophic forgetting, where the models lose their ability to perform well on previously learned scenarios. To address this issue, we formalize the problem of Video Domain Incremental Learning (VDIL), which enables models to learn continually from different domains while maintaining a fixed set of action classes. Existing continual learning research primarily focuses on class-incremental learning, while the domain incremental learning has been largely overlooked in video understanding. In this work, we introduce a novel benchmark of domain incremental human action recognition for unconstrained home environments. We design three domain split types (user, scene, hybrid) to systematically assess the challenges posed by domain shifts in real-world home settings. Furthermore, we propose a baseline learning strategy based on replay and reservoir sampling techniques without domain labels to handle scenarios with limited memory and task agnosticism. Extensive experimental results demonstrate that our simple sampling and replay strategy outperforms most existing continual learning methods across the three proposed benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16946v1-abstract-full').style.display = 'none'; document.getElementById('2412.16946v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12596">arXiv:2412.12596</a> <span> [<a href="https://arxiv.org/pdf/2412.12596">pdf</a>, <a href="https://arxiv.org/format/2412.12596">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> OpenViewer: Openness-Aware Multi-View Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+S">Shide Du</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Z">Zihan Fang</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Y">Yanchao Tan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Changwei Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiping Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenzhong Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12596v1-abstract-short" style="display: inline;"> Multi-view learning methods leverage multiple data sources to enhance perception by mining correlations across views, typically relying on predefined categories. However, deploying these models in real-world scenarios presents two primary openness challenges. 1) Lack of Interpretability: The integration mechanisms of multi-view data in existing black-box models remain poorly explained; 2) Insuffic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12596v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12596v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12596v1-abstract-full" style="display: none;"> Multi-view learning methods leverage multiple data sources to enhance perception by mining correlations across views, typically relying on predefined categories. However, deploying these models in real-world scenarios presents two primary openness challenges. 1) Lack of Interpretability: The integration mechanisms of multi-view data in existing black-box models remain poorly explained; 2) Insufficient Generalization: Most models are not adapted to multi-view scenarios involving unknown categories. To address these challenges, we propose OpenViewer, an openness-aware multi-view learning framework with theoretical support. This framework begins with a Pseudo-Unknown Sample Generation Mechanism to efficiently simulate open multi-view environments and previously adapt to potential unknown samples. Subsequently, we introduce an Expression-Enhanced Deep Unfolding Network to intuitively promote interpretability by systematically constructing functional prior-mapping modules and effectively providing a more transparent integration mechanism for multi-view data. Additionally, we establish a Perception-Augmented Open-Set Training Regime to significantly enhance generalization by precisely boosting confidences for known categories and carefully suppressing inappropriate confidences for unknown ones. Experimental results demonstrate that OpenViewer effectively addresses openness challenges while ensuring recognition performance for both known and unknown samples. The code is released at https://github.com/dushide/OpenViewer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12596v1-abstract-full').style.display = 'none'; document.getElementById('2412.12596v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11109">arXiv:2412.11109</a> <span> [<a href="https://arxiv.org/pdf/2412.11109">pdf</a>, <a href="https://arxiv.org/format/2412.11109">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> SpearBot: Leveraging Large Language Models in a Generative-Critique Framework for Spear-Phishing Email Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qi%2C+Q">Qinglin Qi</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yun Luo</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yijia Xu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yong Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11109v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) are increasingly capable, aiding in tasks such as content generation, yet they also pose risks, particularly in generating harmful spear-phishing emails. These emails, crafted to entice clicks on malicious URLs, threaten personal information security. This paper proposes an adversarial framework, SpearBot, which utilizes LLMs to generate spear-phishing emails with vari… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11109v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11109v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11109v1-abstract-full" style="display: none;"> Large Language Models (LLMs) are increasingly capable, aiding in tasks such as content generation, yet they also pose risks, particularly in generating harmful spear-phishing emails. These emails, crafted to entice clicks on malicious URLs, threaten personal information security. This paper proposes an adversarial framework, SpearBot, which utilizes LLMs to generate spear-phishing emails with various phishing strategies. Through specifically crafted jailbreak prompts, SpearBot circumvents security policies and introduces other LLM instances as critics. When a phishing email is identified by the critic, SpearBot refines the generated email based on the critique feedback until it can no longer be recognized as phishing, thereby enhancing its deceptive quality. To evaluate the effectiveness of SpearBot, we implement various machine-based defenders and assess how well the phishing emails generated could deceive them. Results show these emails often evade detection to a large extent, underscoring their deceptive quality. Additionally, human evaluations of the emails' readability and deception are conducted through questionnaires, confirming their convincing nature and the significant potential harm of the generated phishing emails. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11109v1-abstract-full').style.display = 'none'; document.getElementById('2412.11109v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11080">arXiv:2412.11080</a> <span> [<a href="https://arxiv.org/pdf/2412.11080">pdf</a>, <a href="https://arxiv.org/format/2412.11080">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Spectral Clustering via Joint Spectral Embedding and Kmeans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wengang Guo</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+W">Wei Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11080v1-abstract-short" style="display: inline;"> Spectral clustering is a popular clustering method. It first maps data into the spectral embedding space and then uses Kmeans to find clusters. However, the two decoupled steps prohibit joint optimization for the optimal solution. In addition, it needs to construct the similarity graph for samples, which suffers from the curse of dimensionality when the data are high-dimensional. To address these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11080v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11080v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11080v1-abstract-full" style="display: none;"> Spectral clustering is a popular clustering method. It first maps data into the spectral embedding space and then uses Kmeans to find clusters. However, the two decoupled steps prohibit joint optimization for the optimal solution. In addition, it needs to construct the similarity graph for samples, which suffers from the curse of dimensionality when the data are high-dimensional. To address these two challenges, we introduce \textbf{D}eep \textbf{S}pectral \textbf{C}lustering (\textbf{DSC}), which consists of two main modules: the spectral embedding module and the greedy Kmeans module. The former module learns to efficiently embed raw samples into the spectral embedding space using deep neural networks and power iteration. The latter module improves the cluster structures of Kmeans on the learned spectral embeddings by a greedy optimization strategy, which iteratively reveals the direction of the worst cluster structures and optimizes embeddings in this direction. To jointly optimize spectral embeddings and clustering, we seamlessly integrate the two modules and optimize them in an end-to-end manner. Experimental results on seven real-world datasets demonstrate that DSC achieves state-of-the-art clustering performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11080v1-abstract-full').style.display = 'none'; document.getElementById('2412.11080v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09195">arXiv:2412.09195</a> <span> [<a href="https://arxiv.org/pdf/2412.09195">pdf</a>, <a href="https://arxiv.org/format/2412.09195">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> On the Generation and Removal of Speaker Adversarial Perturbation for Voice-Privacy Protection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+C">Chenyang Guo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liping Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuhai Li</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K+A">Kong Aik Lee</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Z">Zhen-Hua Ling</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wu Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09195v1-abstract-short" style="display: inline;"> Neural networks are commonly known to be vulnerable to adversarial attacks mounted through subtle perturbation on the input data. Recent development in voice-privacy protection has shown the positive use cases of the same technique to conceal speaker's voice attribute with additive perturbation signal generated by an adversarial network. This paper examines the reversibility property where an enti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09195v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09195v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09195v1-abstract-full" style="display: none;"> Neural networks are commonly known to be vulnerable to adversarial attacks mounted through subtle perturbation on the input data. Recent development in voice-privacy protection has shown the positive use cases of the same technique to conceal speaker's voice attribute with additive perturbation signal generated by an adversarial network. This paper examines the reversibility property where an entity generating the adversarial perturbations is authorized to remove them and restore original speech (e.g., the speaker him/herself). A similar technique could also be used by an investigator to deanonymize a voice-protected speech to restore criminals' identities in security and forensic analysis. In this setting, the perturbation generative module is assumed to be known in the removal process. To this end, a joint training of perturbation generation and removal modules is proposed. Experimental results on the LibriSpeech dataset demonstrated that the subtle perturbations added to the original speech can be predicted from the anonymized speech while achieving the goal of privacy protection. By removing these perturbations from the anonymized sample, the original speech can be restored. Audio samples can be found in \url{https://voiceprivacy.github.io/Perturbation-Generation-Removal/}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09195v1-abstract-full').style.display = 'none'; document.getElementById('2412.09195v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures, published to IEEE SLT Workshop 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2024 IEEE Spoken Language Technology Workshop (SLT), 2024, pp. 1197-1202 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07011">arXiv:2412.07011</a> <span> [<a href="https://arxiv.org/pdf/2412.07011">pdf</a>, <a href="https://arxiv.org/format/2412.07011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Multi-Objective Communication Optimization for Temporal Continuity in Dynamic Vehicular Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weian Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wuzhao Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Li Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lun Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dongyang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07011v1-abstract-short" style="display: inline;"> Vehicular Ad-hoc Networks (VANETs) operate in highly dynamic environments characterized by high mobility, time-varying channel conditions, and frequent network disruptions. Addressing these challenges, this paper presents a novel temporal-aware multi-objective robust optimization framework, which for the first time formally incorporates temporal continuity into the optimization of dynamic multi-ho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07011v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07011v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07011v1-abstract-full" style="display: none;"> Vehicular Ad-hoc Networks (VANETs) operate in highly dynamic environments characterized by high mobility, time-varying channel conditions, and frequent network disruptions. Addressing these challenges, this paper presents a novel temporal-aware multi-objective robust optimization framework, which for the first time formally incorporates temporal continuity into the optimization of dynamic multi-hop VANETs. The proposed framework simultaneously optimizes communication delay, throughput, and reliability, ensuring stable and consistent communication paths under rapidly changing conditions. A robust optimization model is formulated to mitigate performance degradation caused by uncertainties in vehicular density and channel fluctuations. To solve the optimization problem, an enhanced Non-dominated Sorting Genetic Algorithm II (NSGA-II) is developed, integrating dynamic encoding, elite inheritance, and adaptive constraint handling to efficiently balance trade-offs among conflicting objectives. Simulation results demonstrate that the proposed framework achieves significant improvements in reliability, delay reduction, and throughput enhancement, while temporal continuity effectively stabilizes communication paths over time. This work provides a pioneering and comprehensive solution for optimizing VANET communication, offering critical insights for robust and efficient strategies in intelligent transportation systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07011v1-abstract-full').style.display = 'none'; document.getElementById('2412.07011v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06219">arXiv:2412.06219</a> <span> [<a href="https://arxiv.org/pdf/2412.06219">pdf</a>, <a href="https://arxiv.org/format/2412.06219">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Data Free Backdoor Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+B">Bochuan Cao</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+J">Jinyuan Jia</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chuxuan Hu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+Z">Zhen Xiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jinghui Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06219v1-abstract-short" style="display: inline;"> Backdoor attacks aim to inject a backdoor into a classifier such that it predicts any input with an attacker-chosen backdoor trigger as an attacker-chosen target class. Existing backdoor attacks require either retraining the classifier with some clean data or modifying the model's architecture. As a result, they are 1) not applicable when clean data is unavailable, 2) less efficient when the model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06219v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06219v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06219v1-abstract-full" style="display: none;"> Backdoor attacks aim to inject a backdoor into a classifier such that it predicts any input with an attacker-chosen backdoor trigger as an attacker-chosen target class. Existing backdoor attacks require either retraining the classifier with some clean data or modifying the model's architecture. As a result, they are 1) not applicable when clean data is unavailable, 2) less efficient when the model is large, and 3) less stealthy due to architecture changes. In this work, we propose DFBA, a novel retraining-free and data-free backdoor attack without changing the model architecture. Technically, our proposed method modifies a few parameters of a classifier to inject a backdoor. Through theoretical analysis, we verify that our injected backdoor is provably undetectable and unremovable by various state-of-the-art defenses under mild assumptions. Our evaluation on multiple datasets further demonstrates that our injected backdoor: 1) incurs negligible classification loss, 2) achieves 100% attack success rates, and 3) bypasses six existing state-of-the-art defenses. Moreover, our comparison with a state-of-the-art non-data-free backdoor attack shows our attack is more stealthy and effective against various defenses while achieving less classification accuracy loss. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06219v1-abstract-full').style.display = 'none'; document.getElementById('2412.06219v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 8 figures, accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05940">arXiv:2412.05940</a> <span> [<a href="https://arxiv.org/pdf/2412.05940">pdf</a>, <a href="https://arxiv.org/format/2412.05940">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Digital Modeling of Massage Techniques and Reproduction by Robotic Arms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yuan Xu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kui Huang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weichao Guo</a>, <a href="/search/cs?searchtype=author&query=Du%2C+L">Leyi Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05940v1-abstract-short" style="display: inline;"> This paper explores the digital modeling and robotic reproduction of traditional Chinese medicine (TCM) massage techniques. We adopt an adaptive admittance control algorithm to optimize force and position control, ensuring safety and comfort. The paper analyzes key TCM techniques from kinematic and dynamic perspectives, and designs robotic systems to reproduce these massage techniques. The results… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05940v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05940v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05940v1-abstract-full" style="display: none;"> This paper explores the digital modeling and robotic reproduction of traditional Chinese medicine (TCM) massage techniques. We adopt an adaptive admittance control algorithm to optimize force and position control, ensuring safety and comfort. The paper analyzes key TCM techniques from kinematic and dynamic perspectives, and designs robotic systems to reproduce these massage techniques. The results demonstrate that the robot successfully mimics the characteristics of TCM massage, providing a foundation for integrating traditional therapy with modern robotics and expanding assistive therapy applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05940v1-abstract-full').style.display = 'none'; document.getElementById('2412.05940v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05734">arXiv:2412.05734</a> <span> [<a href="https://arxiv.org/pdf/2412.05734">pdf</a>, <a href="https://arxiv.org/format/2412.05734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PrivAgent: Agentic-based Red-teaming for LLM Privacy Leakage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nie%2C+Y">Yuzhou Nie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhun Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Ye Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xian Wu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xuandong Zhao</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05734v1-abstract-short" style="display: inline;"> Recent studies have discovered that LLMs have serious privacy leakage concerns, where an LLM may be fooled into outputting private information under carefully crafted adversarial prompts. These risks include leaking system prompts, personally identifiable information, training data, and model parameters. Most existing red-teaming approaches for privacy leakage rely on humans to craft the adversari… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05734v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05734v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05734v1-abstract-full" style="display: none;"> Recent studies have discovered that LLMs have serious privacy leakage concerns, where an LLM may be fooled into outputting private information under carefully crafted adversarial prompts. These risks include leaking system prompts, personally identifiable information, training data, and model parameters. Most existing red-teaming approaches for privacy leakage rely on humans to craft the adversarial prompts. A few automated methods are proposed for system prompt extraction, but they cannot be applied to more severe risks (e.g., training data extraction) and have limited effectiveness even for system prompt extraction. In this paper, we propose PrivAgent, a novel black-box red-teaming framework for LLM privacy leakage. We formulate different risks as a search problem with a unified attack goal. Our framework trains an open-source LLM through reinforcement learning as the attack agent to generate adversarial prompts for different target models under different risks. We propose a novel reward function to provide effective and fine-grained rewards for the attack agent. Finally, we introduce customizations to better fit our general framework to system prompt extraction and training data extraction. Through extensive evaluations, we first show that PrivAgent outperforms existing automated methods in system prompt leakage against six popular LLMs. Notably, our approach achieves a 100% success rate in extracting system prompts from real-world applications in OpenAI's GPT Store. We also show PrivAgent's effectiveness in extracting training data from an open-source LLM with a success rate of 5.9%. We further demonstrate PrivAgent's effectiveness in evading the existing guardrail defense and its helpfulness in enabling better safety alignment. Finally, we validate our customized designs through a detailed ablation study. We release our code here https://github.com/rucnyz/RedAgent. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05734v1-abstract-full').style.display = 'none'; document.getElementById('2412.05734v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00714">arXiv:2412.00714</a> <span> [<a href="https://arxiv.org/pdf/2412.00714">pdf</a>, <a href="https://arxiv.org/format/2412.00714">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Scaling New Frontiers: Insights into Large Recommendation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Luankang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chin%2C+J+Y">Jin Yao Chin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhongzhou Liu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+K">Kai Cheng</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Q">Qiushi Pan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y+Q">Yi Quan Lee</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+W">Wanqi Xue</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+T">Tingjia Shen</a>, <a href="/search/cs?searchtype=author&query=Song%2C+K">Kenan Song</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kefan Wang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+W">Wenjia Xie</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yuyang Ye</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Huifeng Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Enhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00714v1-abstract-short" style="display: inline;"> Recommendation systems are essential for filtering data and retrieving relevant information across various applications. Recent advancements have seen these systems incorporate increasingly large embedding tables, scaling up to tens of terabytes for industrial use. However, the expansion of network parameters in traditional recommendation models has plateaued at tens of millions, limiting further… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00714v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00714v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00714v1-abstract-full" style="display: none;"> Recommendation systems are essential for filtering data and retrieving relevant information across various applications. Recent advancements have seen these systems incorporate increasingly large embedding tables, scaling up to tens of terabytes for industrial use. However, the expansion of network parameters in traditional recommendation models has plateaued at tens of millions, limiting further benefits from increased embedding parameters. Inspired by the success of large language models (LLMs), a new approach has emerged that scales network parameters using innovative structures, enabling continued performance improvements. A significant development in this area is Meta's generative recommendation model HSTU, which illustrates the scaling laws of recommendation systems by expanding parameters to thousands of billions. This new paradigm has achieved substantial performance gains in online experiments. In this paper, we aim to enhance the understanding of scaling laws by conducting comprehensive evaluations of large recommendation models. Firstly, we investigate the scaling laws across different backbone architectures of the large recommendation models. Secondly, we conduct comprehensive ablation studies to explore the origins of these scaling laws. We then further assess the performance of HSTU, as the representative of large recommendation models, on complex user behavior modeling tasks to evaluate its applicability. Notably, we also analyze its effectiveness in ranking tasks for the first time. Finally, we offer insights into future directions for large recommendation models. Supplementary materials for our research are available on GitHub at https://github.com/USTC-StarTeam/Large-Recommendation-Models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00714v1-abstract-full').style.display = 'none'; document.getElementById('2412.00714v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00430">arXiv:2412.00430</a> <span> [<a href="https://arxiv.org/pdf/2412.00430">pdf</a>, <a href="https://arxiv.org/format/2412.00430">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Sequential Recommendation Models with Scaling Laws and Approximate Entropy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+T">Tingjia Shen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chuhan Wu</a>, <a href="/search/cs?searchtype=author&query=Chin%2C+J+Y">Jin Yao Chin</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Huifeng Guo</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Enhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00430v5-abstract-short" style="display: inline;"> Scaling Laws have emerged as a powerful framework for understanding how model performance evolves as they increase in size, providing valuable insights for optimizing computational resources. In the realm of Sequential Recommendation (SR), which is pivotal for predicting users' sequential preferences, these laws offer a lens through which to address the challenges posed by the scalability of SR mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00430v5-abstract-full').style.display = 'inline'; document.getElementById('2412.00430v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00430v5-abstract-full" style="display: none;"> Scaling Laws have emerged as a powerful framework for understanding how model performance evolves as they increase in size, providing valuable insights for optimizing computational resources. In the realm of Sequential Recommendation (SR), which is pivotal for predicting users' sequential preferences, these laws offer a lens through which to address the challenges posed by the scalability of SR models. However, the presence of structural and collaborative issues in recommender systems prevents the direct application of the Scaling Law (SL) in these systems. In response, we introduce the Performance Law for SR models, which aims to theoretically investigate and model the relationship between model performance and data quality. Specifically, we first fit the HR and NDCG metrics to transformer-based SR models. Subsequently, we propose Approximate Entropy (ApEn) to assess data quality, presenting a more nuanced approach compared to traditional data quantity metrics. Our method enables accurate predictions across various dataset scales and model sizes, demonstrating a strong correlation in large SR models and offering insights into achieving optimal performance for any given model configuration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00430v5-abstract-full').style.display = 'none'; document.getElementById('2412.00430v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 5 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68P20 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.3.4; I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00366">arXiv:2412.00366</a> <span> [<a href="https://arxiv.org/pdf/2412.00366">pdf</a>, <a href="https://arxiv.org/format/2412.00366">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Efficient Multi-Robot Motion Planning for Manifold-Constrained Manipulators by Randomized Scheduling and Informed Path Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weihang Guo</a>, <a href="/search/cs?searchtype=author&query=Kingston%2C+Z">Zachary Kingston</a>, <a href="/search/cs?searchtype=author&query=Hang%2C+K">Kaiyu Hang</a>, <a href="/search/cs?searchtype=author&query=Kavraki%2C+L+E">Lydia E. Kavraki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00366v1-abstract-short" style="display: inline;"> Multi-robot motion planning for high degree-of-freedom manipulators in shared, constrained, and narrow spaces is a complex problem and essential for many scenarios such as construction, surgery, and more. Traditional coupled and decoupled methods either scale poorly or lack completeness, and hybrid methods that compose paths from individual robots together require the enumeration of many paths bef… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00366v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00366v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00366v1-abstract-full" style="display: none;"> Multi-robot motion planning for high degree-of-freedom manipulators in shared, constrained, and narrow spaces is a complex problem and essential for many scenarios such as construction, surgery, and more. Traditional coupled and decoupled methods either scale poorly or lack completeness, and hybrid methods that compose paths from individual robots together require the enumeration of many paths before they can find valid composite solutions. This paper introduces Scheduling to Avoid Collisions (StAC), a hybrid approach that more effectively composes paths from individual robots by scheduling (adding random stops and coordination motion along each path) and generates paths that are more likely to be feasible by using bidirectional feedback between the scheduler and motion planner for informed sampling. StAC uses 10 to 100 times fewer paths from the low-level planner than state-of-the-art baselines on challenging problems in manipulator cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00366v1-abstract-full').style.display = 'none'; document.getElementById('2412.00366v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00165">arXiv:2412.00165</a> <span> [<a href="https://arxiv.org/pdf/2412.00165">pdf</a>, <a href="https://arxiv.org/format/2412.00165">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Modelling Networked Dynamical System by Temporal Graph Neural ODE with Irregularly Partial Observed Time-series Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+M">Mengbang Zou</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00165v1-abstract-short" style="display: inline;"> Modeling the evolution of system with time-series data is a challenging and critical task in a wide range of fields, especially when the time-series data is regularly sampled and partially observable. Some methods have been proposed to estimate the hidden dynamics between intervals like Neural ODE or Exponential decay dynamic function and combine with RNN to estimate the evolution. However, it is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00165v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00165v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00165v1-abstract-full" style="display: none;"> Modeling the evolution of system with time-series data is a challenging and critical task in a wide range of fields, especially when the time-series data is regularly sampled and partially observable. Some methods have been proposed to estimate the hidden dynamics between intervals like Neural ODE or Exponential decay dynamic function and combine with RNN to estimate the evolution. However, it is difficult for these methods to capture the spatial and temporal dependencies existing within graph-structured time-series data and take full advantage of the available relational information to impute missing data and predict the future states. Besides, traditional RNN-based methods leverage shared RNN cell to update the hidden state which does not capture the impact of various intervals and missing state information on the reliability of estimating the hidden state. To solve this problem, in this paper, we propose a method embedding Graph Neural ODE with reliability and time-aware mechanism which can capture the spatial and temporal dependencies in irregularly sampled and partially observable time-series data to reconstruct the dynamics. Also, a loss function is designed considering the reliability of the augment data from the above proposed method to make further prediction. The proposed method has been validated in experiments of different networked dynamical systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00165v1-abstract-full').style.display = 'none'; document.getElementById('2412.00165v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19635">arXiv:2411.19635</a> <span> [<a href="https://arxiv.org/pdf/2411.19635">pdf</a>, <a href="https://arxiv.org/format/2411.19635">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Build An Influential Bot In Social Media Simulations With Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+B">Bailu Jin</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19635v1-abstract-short" style="display: inline;"> Understanding the dynamics of public opinion evolution on online social platforms is critical for analyzing influence mechanisms. Traditional approaches to influencer analysis are typically divided into qualitative assessments of personal attributes and quantitative evaluations of influence power. In this study, we introduce a novel simulated environment that combines Agent-Based Modeling (ABM) wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19635v1-abstract-full').style.display = 'inline'; document.getElementById('2411.19635v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19635v1-abstract-full" style="display: none;"> Understanding the dynamics of public opinion evolution on online social platforms is critical for analyzing influence mechanisms. Traditional approaches to influencer analysis are typically divided into qualitative assessments of personal attributes and quantitative evaluations of influence power. In this study, we introduce a novel simulated environment that combines Agent-Based Modeling (ABM) with Large Language Models (LLMs), enabling agents to generate posts, form opinions, and update follower networks. This simulation allows for more detailed observations of how opinion leaders emerge. Additionally, we present an innovative application of Reinforcement Learning (RL) to replicate the process of opinion leader formation. Our findings reveal that limiting the action space and incorporating self-observation are key factors for achieving stable opinion leader generation. The learning curves demonstrate the model's capacity to identify optimal strategies and adapt to complex, unpredictable dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19635v1-abstract-full').style.display = 'none'; document.getElementById('2411.19635v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15447">arXiv:2411.15447</a> <span> [<a href="https://arxiv.org/pdf/2411.15447">pdf</a>, <a href="https://arxiv.org/format/2411.15447">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Gotta Hear Them All: Sound Source Aware Vision to Audio Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Heng Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianbo Ma</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+W">Weidong Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15447v2-abstract-short" style="display: inline;"> Vision-to-audio (V2A) synthesis has broad applications in multimedia. Recent advancements of V2A methods have made it possible to generate relevant audios from inputs of videos or still images. However, the immersiveness and expressiveness of the generation are limited. One possible problem is that existing methods solely rely on the global scene and overlook details of local sounding objects (i.e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15447v2-abstract-full').style.display = 'inline'; document.getElementById('2411.15447v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15447v2-abstract-full" style="display: none;"> Vision-to-audio (V2A) synthesis has broad applications in multimedia. Recent advancements of V2A methods have made it possible to generate relevant audios from inputs of videos or still images. However, the immersiveness and expressiveness of the generation are limited. One possible problem is that existing methods solely rely on the global scene and overlook details of local sounding objects (i.e., sound sources). To address this issue, we propose a Sound Source-Aware V2A (SSV2A) generator. SSV2A is able to locally perceive multimodal sound sources from a scene with visual detection and cross-modality translation. It then contrastively learns a Cross-Modal Sound Source (CMSS) Manifold to semantically disambiguate each source. Finally, we attentively mix their CMSS semantics into a rich audio representation, from which a pretrained audio generator outputs the sound. To model the CMSS manifold, we curate a novel single-sound-source visual-audio dataset VGGS3 from VGGSound. We also design a Sound Source Matching Score to measure localized audio relevance. This is to our knowledge the first work to address V2A generation at the sound-source level. Extensive experiments show that SSV2A surpasses state-of-the-art methods in both generation fidelity and relevance. We further demonstrate SSV2A's ability to achieve intuitive V2A control by compositing vision, text, and audio conditions. Our SSV2A generation can be tried and heard at https://ssv2a.github.io/SSV2A-demo . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15447v2-abstract-full').style.display = 'none'; document.getElementById('2411.15447v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 9 figures, source code released at https://github.com/wguo86/SSV2A</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15005">arXiv:2411.15005</a> <span> [<a href="https://arxiv.org/pdf/2411.15005">pdf</a>, <a href="https://arxiv.org/format/2411.15005">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Multi-granularity Interest Retrieval and Refinement Network for Long-Term User Behavior Modeling in CTR Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiang Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Luankang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wanshan Yang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+R">Runlong Yu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Enhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15005v4-abstract-short" style="display: inline;"> Click-through Rate (CTR) prediction is crucial for online personalization platforms. Recent advancements have shown that modeling rich user behaviors can significantly improve the performance of CTR prediction. Current long-term user behavior modeling algorithms predominantly follow two cascading stages. The first stage retrieves subsequence related to the target item from the long-term behavior s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15005v4-abstract-full').style.display = 'inline'; document.getElementById('2411.15005v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15005v4-abstract-full" style="display: none;"> Click-through Rate (CTR) prediction is crucial for online personalization platforms. Recent advancements have shown that modeling rich user behaviors can significantly improve the performance of CTR prediction. Current long-term user behavior modeling algorithms predominantly follow two cascading stages. The first stage retrieves subsequence related to the target item from the long-term behavior sequence, while the second stage models the relationship between the subsequence and the target item. Despite significant progress, these methods have two critical flaws. First, the retrieval query typically includes only target item information, limiting the ability to capture the user's diverse interests. Second, relational information, such as sequential and interactive information within the subsequence, is frequently overlooked. Therefore, it requires to be further mined to more accurately model user interests. To this end, we propose Multi-granularity Interest Retrieval and Refinement Network (MIRRN). Specifically, we first construct queries based on behaviors observed at different time scales to obtain subsequences, each capturing users' interest at various granularities. We then introduce an noval multi-head Fourier transformer to efficiently learn sequential and interactive information within the subsequences, leading to more accurate modeling of user interests. Finally, we employ multi-head target attention to adaptively assess the impact of these multi-granularity interests on the target item. Extensive experiments have demonstrated that MIRRN significantly outperforms state-of-the-art baselines. Furthermore, an A/B test shows that MIRRN increases the average number of listening songs by 1.32% and the average time of listening songs by 0.55% on the Huawei Music App. The implementation code is publicly available at https://github.com/USTC-StarTeam/MIRRN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15005v4-abstract-full').style.display = 'none'; document.getElementById('2411.15005v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10948">arXiv:2411.10948</a> <span> [<a href="https://arxiv.org/pdf/2411.10948">pdf</a>, <a href="https://arxiv.org/format/2411.10948">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Accurate and Efficient Sub-8-Bit Integer Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenjin Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Donglai Liu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+W">Weiying Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunsong Li</a>, <a href="/search/cs?searchtype=author&query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Z">Zihan Meng</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+S">Shulin Zeng</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+J">Jie Lei</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Z">Zhenman Fang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10948v1-abstract-short" style="display: inline;"> Neural network training is a memory- and compute-intensive task. Quantization, which enables low-bitwidth formats in training, can significantly mitigate the workload. To reduce quantization error, recent methods have developed new data formats and additional pre-processing operations on quantizers. However, it remains quite challenging to achieve high accuracy and efficiency simultaneously. In th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10948v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10948v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10948v1-abstract-full" style="display: none;"> Neural network training is a memory- and compute-intensive task. Quantization, which enables low-bitwidth formats in training, can significantly mitigate the workload. To reduce quantization error, recent methods have developed new data formats and additional pre-processing operations on quantizers. However, it remains quite challenging to achieve high accuracy and efficiency simultaneously. In this paper, we explore sub-8-bit integer training from its essence of gradient descent optimization. Our integer training framework includes two components: ShiftQuant to realize accurate gradient estimation, and L1 normalization to smoothen the loss landscape. ShiftQuant attains performance that approaches the theoretical upper bound of group quantization. Furthermore, it liberates group quantization from inefficient memory rearrangement. The L1 normalization facilitates the implementation of fully quantized normalization layers with impressive convergence accuracy. Our method frees sub-8-bit integer training from pre-processing and supports general devices. This framework achieves negligible accuracy loss across various neural networks and tasks ($0.92\%$ on 4-bit ResNets, $0.61\%$ on 6-bit Transformers). The prototypical implementation of ShiftQuant achieves more than $1.85\times/15.3\%$ performance improvement on CPU/GPU compared to its FP16 counterparts, and $33.9\%$ resource consumption reduction on FPGA than the FP16 counterparts. The proposed fully-quantized L1 normalization layers achieve more than $35.54\%$ improvement in throughout on CPU compared to traditional L2 normalization layers. Moreover, theoretical analysis verifies the advancement of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10948v1-abstract-full').style.display = 'none'; document.getElementById('2411.10948v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08645">arXiv:2411.08645</a> <span> [<a href="https://arxiv.org/pdf/2411.08645">pdf</a>, <a href="https://arxiv.org/format/2411.08645">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> A System Level Performance Evaluation for Superconducting Digital Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kundu%2C+J">Joyjit Kundu</a>, <a href="/search/cs?searchtype=author&query=Bhattacharjee%2C+D">Debjyoti Bhattacharjee</a>, <a href="/search/cs?searchtype=author&query=Josephsen%2C+N">Nathan Josephsen</a>, <a href="/search/cs?searchtype=author&query=Pokhrel%2C+A">Ankit Pokhrel</a>, <a href="/search/cs?searchtype=author&query=De+Silva%2C+U">Udara De Silva</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenzhe Guo</a>, <a href="/search/cs?searchtype=author&query=Van+Winckel%2C+S">Steven Van Winckel</a>, <a href="/search/cs?searchtype=author&query=Brebels%2C+S">Steven Brebels</a>, <a href="/search/cs?searchtype=author&query=Perumkunnil%2C+M">Manu Perumkunnil</a>, <a href="/search/cs?searchtype=author&query=Herr%2C+Q">Quentin Herr</a>, <a href="/search/cs?searchtype=author&query=Herr%2C+A">Anna Herr</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08645v1-abstract-short" style="display: inline;"> Superconducting Digital (SCD) technology offers significant potential for enhancing the performance of next generation large scale compute workloads. By leveraging advanced lithography and a 300 mm platform, SCD devices can reduce energy consumption and boost computational power. This paper presents a cross-layer modeling approach to evaluate the system-level performance benefits of SCD architectu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08645v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08645v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08645v1-abstract-full" style="display: none;"> Superconducting Digital (SCD) technology offers significant potential for enhancing the performance of next generation large scale compute workloads. By leveraging advanced lithography and a 300 mm platform, SCD devices can reduce energy consumption and boost computational power. This paper presents a cross-layer modeling approach to evaluate the system-level performance benefits of SCD architectures for Large Language Model (LLM) training and inference. Our findings, based on experimental data and Pulse Conserving Logic (PCL) design principles, demonstrate substantial performance gain in both training and inference. We are, thus, able to convincingly show that the SCD technology can address memory and interconnect limitations of present day solutions for next-generation compute systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08645v1-abstract-full').style.display = 'none'; document.getElementById('2411.08645v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> DATE 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05681">arXiv:2411.05681</a> <span> [<a href="https://arxiv.org/pdf/2411.05681">pdf</a>, <a href="https://arxiv.org/format/2411.05681">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MITS.2024.3427655">10.1109/MITS.2024.3427655 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Survey of AI-Related Cyber Security Risks and Countermeasures in Mobility-as-a-Service </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chu%2C+K">Kai-Fung Chu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+H">Haiyue Yuan</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinsheng Yuan</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weisi Guo</a>, <a href="/search/cs?searchtype=author&query=Balta-Ozkan%2C+N">Nazmiye Balta-Ozkan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shujun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05681v1-abstract-short" style="display: inline;"> Mobility-as-a-Service (MaaS) integrates different transport modalities and can support more personalisation of travellers' journey planning based on their individual preferences, behaviours and wishes. To fully achieve the potential of MaaS, a range of AI (including machine learning and data mining) algorithms are needed to learn personal requirements and needs, to optimise journey planning of eac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05681v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05681v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05681v1-abstract-full" style="display: none;"> Mobility-as-a-Service (MaaS) integrates different transport modalities and can support more personalisation of travellers' journey planning based on their individual preferences, behaviours and wishes. To fully achieve the potential of MaaS, a range of AI (including machine learning and data mining) algorithms are needed to learn personal requirements and needs, to optimise journey planning of each traveller and all travellers as a whole, to help transport service operators and relevant governmental bodies to operate and plan their services, and to detect and prevent cyber attacks from various threat actors including dishonest and malicious travellers and transport operators. The increasing use of different AI and data processing algorithms in both centralised and distributed settings opens the MaaS ecosystem up to diverse cyber and privacy attacks at both the AI algorithm level and the connectivity surfaces. In this paper, we present the first comprehensive review on the coupling between AI-driven MaaS design and the diverse cyber security challenges related to cyber attacks and countermeasures. In particular, we focus on how current and emerging AI-facilitated privacy risks (profiling, inference, and third-party threats) and adversarial AI attacks (evasion, extraction, and gamification) may impact the MaaS ecosystem. These risks often combine novel attacks (e.g., inverse learning) with traditional attack vectors (e.g., man-in-the-middle attacks), exacerbating the risks for the wider participation actors and the emergence of new business models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05681v1-abstract-full').style.display = 'none'; document.getElementById('2411.05681v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Intelligent Transportation Systems Magazine (Volume: 16, Issue: 6, Nov.-Dec. 2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03569">arXiv:2411.03569</a> <span> [<a href="https://arxiv.org/pdf/2411.03569">pdf</a>, <a href="https://arxiv.org/format/2411.03569">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Personalized Federated Learning via Comprehensive Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengju Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bochao Liu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weijia Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yong Li</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+S">Shiming Ge</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03569v1-abstract-short" style="display: inline;"> Federated learning is a distributed machine learning paradigm designed to protect data privacy. However, data heterogeneity across various clients results in catastrophic forgetting, where the model rapidly forgets previous knowledge while acquiring new knowledge. To address this challenge, personalized federated learning has emerged to customize a personalized model for each client. However, the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03569v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03569v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03569v1-abstract-full" style="display: none;"> Federated learning is a distributed machine learning paradigm designed to protect data privacy. However, data heterogeneity across various clients results in catastrophic forgetting, where the model rapidly forgets previous knowledge while acquiring new knowledge. To address this challenge, personalized federated learning has emerged to customize a personalized model for each client. However, the inherent limitation of this mechanism is its excessive focus on personalization, potentially hindering the generalization of those models. In this paper, we present a novel personalized federated learning method that uses global and historical models as teachers and the local model as the student to facilitate comprehensive knowledge distillation. The historical model represents the local model from the last round of client training, containing historical personalized knowledge, while the global model represents the aggregated model from the last round of server aggregation, containing global generalized knowledge. By applying knowledge distillation, we effectively transfer global generalized knowledge and historical personalized knowledge to the local model, thus mitigating catastrophic forgetting and enhancing the general performance of personalized models. Extensive experimental results demonstrate the significant advantages of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03569v1-abstract-full').style.display = 'none'; document.getElementById('2411.03569v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE SMC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03371">arXiv:2411.03371</a> <span> [<a href="https://arxiv.org/pdf/2411.03371">pdf</a>, <a href="https://arxiv.org/ps/2411.03371">ps</a>, <a href="https://arxiv.org/format/2411.03371">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Blockchain-Based Multi-Path Mobile Access Point Selection for Secure 5G VANETs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiou Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weian Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Li Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dongyang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03371v1-abstract-short" style="display: inline;"> This letter presents a blockchain-based multi-path mobile access point (MAP) selection strategy for secure 5G vehicular ad-hoc networks (VANETs). The proposed method leverages blockchain technology for decentralized, transparent, and secure MAP selection, while the multi-path transmission strategy enhances network reliability and reduces communication delays. A trust-based attack detection mechani… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03371v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03371v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03371v1-abstract-full" style="display: none;"> This letter presents a blockchain-based multi-path mobile access point (MAP) selection strategy for secure 5G vehicular ad-hoc networks (VANETs). The proposed method leverages blockchain technology for decentralized, transparent, and secure MAP selection, while the multi-path transmission strategy enhances network reliability and reduces communication delays. A trust-based attack detection mechanism is integrated to ensure network security. Simulation results demonstrate that the proposed algorithm reduces both handover frequency and average communication delay by over 80%, and successfully identifies and excludes more than 95% of Sybil nodes, ensuring reliable and secure communication in highly dynamic vehicular environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03371v1-abstract-full').style.display = 'none'; document.getElementById('2411.03371v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03027">arXiv:2411.03027</a> <span> [<a href="https://arxiv.org/pdf/2411.03027">pdf</a>, <a href="https://arxiv.org/format/2411.03027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Genetic Selection based Pinning Control with Asymmetric Coupling for Multi-Network Heterogeneous Vehicular Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weian Guo</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+R">Ruizhi Sha</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Li Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lun Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dongyang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03027v1-abstract-short" style="display: inline;"> To alleviate computational load on RSUs and cloud platforms, reduce communication bandwidth requirements, and provide a more stable vehicular network service, this paper proposes an optimized pinning control approach for heterogeneous multi-network vehicular ad-hoc networks (VANETs). In such networks, vehicles participate in multiple task-specific networks with asymmetric coupling and dynamic topo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03027v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03027v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03027v1-abstract-full" style="display: none;"> To alleviate computational load on RSUs and cloud platforms, reduce communication bandwidth requirements, and provide a more stable vehicular network service, this paper proposes an optimized pinning control approach for heterogeneous multi-network vehicular ad-hoc networks (VANETs). In such networks, vehicles participate in multiple task-specific networks with asymmetric coupling and dynamic topologies. We first establish a rigorous theoretical foundation by proving the stability of pinning control strategies under both single and multi-network conditions, deriving sufficient stability conditions using Lyapunov theory and linear matrix inequalities (LMIs). Building on this theoretical groundwork, we propose an adaptive genetic algorithm tailored to select optimal pinning nodes, effectively balancing LMI constraints while prioritizing overlapping nodes to enhance control efficiency. Extensive simulations across various network scales demonstrate that our approach achieves rapid consensus with a reduced number of control nodes, particularly when leveraging network overlaps. This work provides a comprehensive solution for efficient control node selection in complex vehicular networks, offering practical implications for deploying large-scale intelligent transportation systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03027v1-abstract-full').style.display = 'none'; document.getElementById('2411.03027v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02057">arXiv:2411.02057</a> <span> [<a href="https://arxiv.org/pdf/2411.02057">pdf</a>, <a href="https://arxiv.org/format/2411.02057">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Unlabeled Data with Multiple Expert Teachers for Open Vocabulary Aerial Object Detection and Its Orientation Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weiwei Guo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xue Yang</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+N">Ning Liao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shaofeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yi Yu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wenxian Yu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Junchi Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02057v1-abstract-short" style="display: inline;"> In recent years, aerial object detection has been increasingly pivotal in various earth observation applications. However, current algorithms are limited to detecting a set of pre-defined object categories, demanding sufficient annotated training samples, and fail to detect novel object categories. In this paper, we put forth a novel formulation of the aerial object detection problem, namely open-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02057v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02057v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02057v1-abstract-full" style="display: none;"> In recent years, aerial object detection has been increasingly pivotal in various earth observation applications. However, current algorithms are limited to detecting a set of pre-defined object categories, demanding sufficient annotated training samples, and fail to detect novel object categories. In this paper, we put forth a novel formulation of the aerial object detection problem, namely open-vocabulary aerial object detection (OVAD), which can detect objects beyond training categories without costly collecting new labeled data. We propose CastDet, a CLIP-activated student-teacher detection framework that serves as the first OVAD detector specifically designed for the challenging aerial scenario, where objects often exhibit weak appearance features and arbitrary orientations. Our framework integrates a robust localization teacher along with several box selection strategies to generate high-quality proposals for novel objects. Additionally, the RemoteCLIP model is adopted as an omniscient teacher, which provides rich knowledge to enhance classification capabilities for novel categories. A dynamic label queue is devised to maintain high-quality pseudo-labels during training. By doing so, the proposed CastDet boosts not only novel object proposals but also classification. Furthermore, we extend our approach from horizontal OVAD to oriented OVAD with tailored algorithm designs to effectively manage bounding box representation and pseudo-label generation. Extensive experiments for both tasks on multiple existing aerial object detection datasets demonstrate the effectiveness of our approach. The code is available at https://github.com/lizzy8587/CastDet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02057v1-abstract-full').style.display = 'none'; document.getElementById('2411.02057v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22225">arXiv:2410.22225</a> <span> [<a href="https://arxiv.org/pdf/2410.22225">pdf</a>, <a href="https://arxiv.org/format/2410.22225">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> CaStL: Constraints as Specifications through LLM Translation for Long-Horizon Task and Motion Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weihang Guo</a>, <a href="/search/cs?searchtype=author&query=Kingston%2C+Z">Zachary Kingston</a>, <a href="/search/cs?searchtype=author&query=Kavraki%2C+L+E">Lydia E. Kavraki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22225v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated remarkable ability in long-horizon Task and Motion Planning (TAMP) by translating clear and straightforward natural language problems into formal specifications such as the Planning Domain Definition Language (PDDL). However, real-world problems are often ambiguous and involve many complex constraints. In this paper, we introduce Constraints as Specif… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22225v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22225v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22225v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated remarkable ability in long-horizon Task and Motion Planning (TAMP) by translating clear and straightforward natural language problems into formal specifications such as the Planning Domain Definition Language (PDDL). However, real-world problems are often ambiguous and involve many complex constraints. In this paper, we introduce Constraints as Specifications through LLMs (CaStL), a framework that identifies constraints such as goal conditions, action ordering, and action blocking from natural language in multiple stages. CaStL translates these constraints into PDDL and Python scripts, which are solved using an custom PDDL solver. Tested across three PDDL domains, CaStL significantly improves constraint handling and planning success rates from natural language specification in complex scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22225v1-abstract-full').style.display = 'none'; document.getElementById('2410.22225v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21487">arXiv:2410.21487</a> <span> [<a href="https://arxiv.org/pdf/2410.21487">pdf</a>, <a href="https://arxiv.org/format/2410.21487">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3627673.3679849">10.1145/3627673.3679849 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Enhancing CTR Prediction in Recommendation Domain with Search Query Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuening Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Man Chen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yaochen Hu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Huifeng Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&query=Coates%2C+M">Mark Coates</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21487v1-abstract-short" style="display: inline;"> Many platforms, such as e-commerce websites, offer both search and recommendation services simultaneously to better meet users' diverse needs. Recommendation services suggest items based on user preferences, while search services allow users to search for items before providing recommendations. Since users and items are often shared between the search and recommendation domains, there is a valuabl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21487v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21487v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21487v1-abstract-full" style="display: none;"> Many platforms, such as e-commerce websites, offer both search and recommendation services simultaneously to better meet users' diverse needs. Recommendation services suggest items based on user preferences, while search services allow users to search for items before providing recommendations. Since users and items are often shared between the search and recommendation domains, there is a valuable opportunity to enhance the recommendation domain by leveraging user preferences extracted from the search domain. Existing approaches either overlook the shift in user intention between these domains or fail to capture the significant impact of learning from users' search queries on understanding their interests. In this paper, we propose a framework that learns from user search query embeddings within the context of user preferences in the recommendation domain. Specifically, user search query sequences from the search domain are used to predict the items users will click at the next time point in the recommendation domain. Additionally, the relationship between queries and items is explored through contrastive learning. To address issues of data sparsity, the diffusion model is incorporated to infer positive items the user will select after searching with certain queries in a denoising manner, which is particularly effective in preventing false positives. Effectively extracting this information, the queries are integrated into click-through rate prediction in the recommendation domain. Experimental analysis demonstrates that our model outperforms state-of-the-art models in the recommendation domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21487v1-abstract-full').style.display = 'none'; document.getElementById('2410.21487v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CIKM 2024 Full Research Track</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> CIKM (2024) 2462-2471 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20154">arXiv:2410.20154</a> <span> [<a href="https://arxiv.org/pdf/2410.20154">pdf</a>, <a href="https://arxiv.org/format/2410.20154">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Detection-Guided Deep Learning-Based Model with Spatial Regularization for Lung Nodule Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiasen Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Mingrui Yang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weihong Guo</a>, <a href="/search/cs?searchtype=author&query=Xavier%2C+B+A">Brian A. Xavier</a>, <a href="/search/cs?searchtype=author&query=Bolen%2C+M">Michael Bolen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaojuan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20154v1-abstract-short" style="display: inline;"> Lung cancer ranks as one of the leading causes of cancer diagnosis and is the foremost cause of cancer-related mortality worldwide. The early detection of lung nodules plays a pivotal role in improving outcomes for patients, as it enables timely and effective treatment interventions. The segmentation of lung nodules plays a critical role in aiding physicians in distinguishing between malignant and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20154v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20154v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20154v1-abstract-full" style="display: none;"> Lung cancer ranks as one of the leading causes of cancer diagnosis and is the foremost cause of cancer-related mortality worldwide. The early detection of lung nodules plays a pivotal role in improving outcomes for patients, as it enables timely and effective treatment interventions. The segmentation of lung nodules plays a critical role in aiding physicians in distinguishing between malignant and benign lesions. However, this task remains challenging due to the substantial variation in the shapes and sizes of lung nodules, and their frequent proximity to lung tissues, which complicates clear delineation. In this study, we introduce a novel model for segmenting lung nodules in computed tomography (CT) images, leveraging a deep learning framework that integrates segmentation and classification processes. This model is distinguished by its use of feature combination blocks, which facilitate the sharing of information between the segmentation and classification components. Additionally, we employ the classification outcomes as priors to refine the size estimation of the predicted nodules, integrating these with a spatial regularization technique to enhance precision. Furthermore, recognizing the challenges posed by limited training datasets, we have developed an optimal transfer learning strategy that freezes certain layers to further improve performance. The results show that our proposed model can capture the target nodules more accurately compared to other commonly used models. By applying transfer learning, the performance can be further improved, achieving a sensitivity score of 0.885 and a Dice score of 0.814. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20154v1-abstract-full').style.display = 'none'; document.getElementById('2410.20154v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18610">arXiv:2410.18610</a> <span> [<a href="https://arxiv.org/pdf/2410.18610">pdf</a>, <a href="https://arxiv.org/format/2410.18610">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Joint Representation Using Continuous and Discrete Features for Cardiovascular Diseases Risk Prediction on Chest CT Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+M">Minfeng Xu</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+C">Chen-Chen Fan</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yan-Jie Zhou</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenchao Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Pan Liu</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+J">Jing Qi</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&query=Chao%2C+H">Hanqing Chao</a>, <a href="/search/cs?searchtype=author&query=He%2C+K">Kunlun He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18610v2-abstract-short" style="display: inline;"> Cardiovascular diseases (CVD) remain a leading health concern and contribute significantly to global mortality rates. While clinical advancements have led to a decline in CVD mortality, accurately identifying individuals who could benefit from preventive interventions remains an unsolved challenge in preventive cardiology. Current CVD risk prediction models, recommended by guidelines, are based on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18610v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18610v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18610v2-abstract-full" style="display: none;"> Cardiovascular diseases (CVD) remain a leading health concern and contribute significantly to global mortality rates. While clinical advancements have led to a decline in CVD mortality, accurately identifying individuals who could benefit from preventive interventions remains an unsolved challenge in preventive cardiology. Current CVD risk prediction models, recommended by guidelines, are based on limited traditional risk factors or use CT imaging to acquire quantitative biomarkers, and still have limitations in predictive accuracy and applicability. On the other hand, end-to-end trained CVD risk prediction methods leveraging deep learning on CT images often fail to provide transparent and explainable decision grounds for assisting physicians. In this work, we proposed a novel joint representation that integrates discrete quantitative biomarkers and continuous deep features extracted from chest CT scans. Our approach initiated with a deep CVD risk classification model by capturing comprehensive continuous deep learning features while jointly obtaining currently clinical-established quantitative biomarkers via segmentation models. In the feature joint representation stage, we use an instance-wise feature-gated mechanism to align the continuous and discrete features, followed by a soft instance-wise feature interaction mechanism fostering independent and effective feature interaction for the final CVD risk prediction. Our method substantially improves CVD risk predictive performance and offers individual contribution analysis of each biomarker, which is important in assisting physicians' decision-making processes. We validated our method on a public chest low-dose CT dataset and a private external chest standard-dose CT patient cohort of 17,207 CT volumes from 6,393 unique subjects, and demonstrated superior predictive performance, achieving AUCs of 0.875 and 0.843, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18610v2-abstract-full').style.display = 'none'; document.getElementById('2410.18610v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14616">arXiv:2410.14616</a> <span> [<a href="https://arxiv.org/pdf/2410.14616">pdf</a>, <a href="https://arxiv.org/format/2410.14616">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Deep Reinforcement Learning for Navigation in Denied Sensor Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wisniewski%2C+M">Mariusz Wisniewski</a>, <a href="/search/cs?searchtype=author&query=Chatzithanos%2C+P">Paraskevas Chatzithanos</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weisi Guo</a>, <a href="/search/cs?searchtype=author&query=Tsourdos%2C+A">Antonios Tsourdos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14616v1-abstract-short" style="display: inline;"> Deep Reinforcement learning (DRL) is used to enable autonomous navigation in unknown environments. Most research assume perfect sensor data, but real-world environments may contain natural and artificial sensor noise and denial. Here, we present a benchmark of both well-used and emerging DRL algorithms in a navigation task with configurable sensor denial effects. In particular, we are interested i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14616v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14616v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14616v1-abstract-full" style="display: none;"> Deep Reinforcement learning (DRL) is used to enable autonomous navigation in unknown environments. Most research assume perfect sensor data, but real-world environments may contain natural and artificial sensor noise and denial. Here, we present a benchmark of both well-used and emerging DRL algorithms in a navigation task with configurable sensor denial effects. In particular, we are interested in comparing how different DRL methods (e.g. model-free PPO vs. model-based DreamerV3) are affected by sensor denial. We show that DreamerV3 outperforms other methods in the visual end-to-end navigation task with a dynamic goal - and other methods are not able to learn this. Furthermore, DreamerV3 generally outperforms other methods in sensor-denied environments. In order to improve robustness, we use adversarial training and demonstrate an improved performance in denied environments, although this generally comes with a performance cost on the vanilla environments. We anticipate this benchmark of different DRL methods and the usage of adversarial training to be a starting point for the development of more elaborate navigation strategies that are capable of dealing with uncertain and denied sensor readings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14616v1-abstract-full').style.display = 'none'; document.getElementById('2410.14616v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 19 figures. For associated code, see https://github.com/mazqtpopx/cranfield-navigation-gym</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.9 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11559">arXiv:2410.11559</a> <span> [<a href="https://arxiv.org/pdf/2410.11559">pdf</a>, <a href="https://arxiv.org/format/2410.11559">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Why Go Full? Elevating Federated Learning Through Partial Network Updates </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haolin Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xuefeng Liu</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+J">Jianwei Niu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenkai Guo</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Shaojie Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11559v3-abstract-short" style="display: inline;"> Federated learning is a distributed machine learning paradigm designed to protect user data privacy, which has been successfully implemented across various scenarios. In traditional federated learning, the entire parameter set of local models is updated and averaged in each training round. Although this full network update method maximizes knowledge acquisition and sharing for each model layer, it… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11559v3-abstract-full').style.display = 'inline'; document.getElementById('2410.11559v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11559v3-abstract-full" style="display: none;"> Federated learning is a distributed machine learning paradigm designed to protect user data privacy, which has been successfully implemented across various scenarios. In traditional federated learning, the entire parameter set of local models is updated and averaged in each training round. Although this full network update method maximizes knowledge acquisition and sharing for each model layer, it prevents the layers of the global model from cooperating effectively to complete the tasks of each client, a challenge we refer to as layer mismatch. This mismatch problem recurs after every parameter averaging, consequently slowing down model convergence and degrading overall performance. To address the layer mismatch issue, we introduce the FedPart method, which restricts model updates to either a single layer or a few layers during each communication round. Furthermore, to maintain the efficiency of knowledge acquisition and sharing, we develop several strategies to select trainable layers in each round, including sequential updating and multi-round cycle training. Through both theoretical analysis and experiments, our findings demonstrate that the FedPart method significantly surpasses conventional full network update strategies in terms of convergence speed and accuracy, while also reducing communication and computational overheads. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11559v3-abstract-full').style.display = 'none'; document.getElementById('2410.11559v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 8 figures, accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11096">arXiv:2410.11096</a> <span> [<a href="https://arxiv.org/pdf/2410.11096">pdf</a>, <a href="https://arxiv.org/format/2410.11096">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SecCodePLT: A Unified Platform for Evaluating the Security of Code GenAI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yu Yang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+Y">Yuzhou Nie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhun Wang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yuheng Tang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11096v1-abstract-short" style="display: inline;"> Existing works have established multiple benchmarks to highlight the security risks associated with Code GenAI. These risks are primarily reflected in two areas: a model potential to generate insecure code (insecure coding) and its utility in cyberattacks (cyberattack helpfulness). While these benchmarks have made significant strides, there remain opportunities for further improvement. For instanc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11096v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11096v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11096v1-abstract-full" style="display: none;"> Existing works have established multiple benchmarks to highlight the security risks associated with Code GenAI. These risks are primarily reflected in two areas: a model potential to generate insecure code (insecure coding) and its utility in cyberattacks (cyberattack helpfulness). While these benchmarks have made significant strides, there remain opportunities for further improvement. For instance, many current benchmarks tend to focus more on a model ability to provide attack suggestions rather than its capacity to generate executable attacks. Additionally, most benchmarks rely heavily on static evaluation metrics, which may not be as precise as dynamic metrics such as passing test cases. Conversely, expert-verified benchmarks, while offering high-quality data, often operate at a smaller scale. To address these gaps, we develop SecCodePLT, a unified and comprehensive evaluation platform for code GenAIs' risks. For insecure code, we introduce a new methodology for data creation that combines experts with automatic generation. Our methodology ensures the data quality while enabling large-scale generation. We also associate samples with test cases to conduct code-related dynamic evaluation. For cyberattack helpfulness, we set up a real environment and construct samples to prompt a model to generate actual attacks, along with dynamic metrics in our environment. We conduct extensive experiments and show that SecCodePLT outperforms the state-of-the-art (SOTA) benchmark CyberSecEval in security relevance. Furthermore, it better identifies the security risks of SOTA models in insecure coding and cyberattack helpfulness. Finally, we apply SecCodePLT to the SOTA code agent, Cursor, and, for the first time, identify non-trivial security risks in this advanced coding agent. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11096v1-abstract-full').style.display = 'none'; document.getElementById('2410.11096v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10184">arXiv:2410.10184</a> <span> [<a href="https://arxiv.org/pdf/2410.10184">pdf</a>, <a href="https://arxiv.org/format/2410.10184">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICME57554.2024.10688155">10.1109/ICME57554.2024.10688155 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Eliminating the Language Bias for Visual Question Answering with fine-grained Causal Intervention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Ying Liu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+G">Ge Bai</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chenji Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shilong Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhang Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+R">Ruifang Liu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbin Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10184v1-abstract-short" style="display: inline;"> Despite the remarkable advancements in Visual Question Answering (VQA), the challenge of mitigating the language bias introduced by textual information remains unresolved. Previous approaches capture language bias from a coarse-grained perspective. However, the finer-grained information within a sentence, such as context and keywords, can result in different biases. Due to the ignorance of fine-gr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10184v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10184v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10184v1-abstract-full" style="display: none;"> Despite the remarkable advancements in Visual Question Answering (VQA), the challenge of mitigating the language bias introduced by textual information remains unresolved. Previous approaches capture language bias from a coarse-grained perspective. However, the finer-grained information within a sentence, such as context and keywords, can result in different biases. Due to the ignorance of fine-grained information, most existing methods fail to sufficiently capture language bias. In this paper, we propose a novel causal intervention training scheme named CIBi to eliminate language bias from a finer-grained perspective. Specifically, we divide the language bias into context bias and keyword bias. We employ causal intervention and contrastive learning to eliminate context bias and improve the multi-modal representation. Additionally, we design a new question-only branch based on counterfactual generation to distill and eliminate keyword bias. Experimental results illustrate that CIBi is applicable to various VQA models, yielding competitive performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10184v1-abstract-full').style.display = 'none'; document.getElementById('2410.10184v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2024 IEEE International Conference on Multimedia and Expo (ICME), Niagara Falls, ON, Canada, 2024, pp. 1-6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04039">arXiv:2410.04039</a> <span> [<a href="https://arxiv.org/pdf/2410.04039">pdf</a>, <a href="https://arxiv.org/format/2410.04039">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> BlockFound: Customized blockchain foundation model for anomaly detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jiahao Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xian Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hao Liu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+X">Xinyu Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04039v3-abstract-short" style="display: inline;"> We propose BlockFound, a customized foundation model for anomaly blockchain transaction detection. Unlike existing methods that rely on rule-based systems or directly apply off-the-shelf large language models, BlockFound introduces a series of customized designs to model the unique data structure of blockchain transactions. First, a blockchain transaction is multi-modal, containing blockchain-spec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04039v3-abstract-full').style.display = 'inline'; document.getElementById('2410.04039v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04039v3-abstract-full" style="display: none;"> We propose BlockFound, a customized foundation model for anomaly blockchain transaction detection. Unlike existing methods that rely on rule-based systems or directly apply off-the-shelf large language models, BlockFound introduces a series of customized designs to model the unique data structure of blockchain transactions. First, a blockchain transaction is multi-modal, containing blockchain-specific tokens, texts, and numbers. We design a modularized tokenizer to handle these multi-modal inputs, balancing the information across different modalities. Second, we design a customized mask language learning mechanism for pretraining with RoPE embedding and FlashAttention for handling longer sequences. After training the foundation model, we further design a novel detection method for anomaly detection. Extensive evaluations on Ethereum and Solana transactions demonstrate BlockFound's exceptional capability in anomaly detection while maintaining a low false positive rate. Remarkably, BlockFound is the only method that successfully detects anomalous transactions on Solana with high accuracy, whereas all other approaches achieved very low or zero detection recall scores. This work not only provides new foundation models for blockchain but also sets a new benchmark for applying LLMs in blockchain data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04039v3-abstract-full').style.display = 'none'; document.getElementById('2410.04039v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02970">arXiv:2410.02970</a> <span> [<a href="https://arxiv.org/pdf/2410.02970">pdf</a>, <a href="https://arxiv.org/format/2410.02970">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> F-Fidelity: A Robust Framework for Faithfulness Evaluation of Explainable AI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xu Zheng</a>, <a href="/search/cs?searchtype=author&query=Shirani%2C+F">Farhad Shirani</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhuomin Chen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chaohao Lin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wei Cheng</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+D">Dongsheng Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02970v1-abstract-short" style="display: inline;"> Recent research has developed a number of eXplainable AI (XAI) techniques. Although extracting meaningful insights from deep learning models, how to properly evaluate these XAI methods remains an open problem. The most widely used approach is to perturb or even remove what the XAI method considers to be the most important features in an input and observe the changes in the output prediction. This… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02970v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02970v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02970v1-abstract-full" style="display: none;"> Recent research has developed a number of eXplainable AI (XAI) techniques. Although extracting meaningful insights from deep learning models, how to properly evaluate these XAI methods remains an open problem. The most widely used approach is to perturb or even remove what the XAI method considers to be the most important features in an input and observe the changes in the output prediction. This approach although efficient suffers the Out-of-Distribution (OOD) problem as the perturbed samples may no longer follow the original data distribution. A recent method RemOve And Retrain (ROAR) solves the OOD issue by retraining the model with perturbed samples guided by explanations. However, the training may not always converge given the distribution difference. Furthermore, using the model retrained based on XAI methods to evaluate these explainers may cause information leakage and thus lead to unfair comparisons. We propose Fine-tuned Fidelity F-Fidelity, a robust evaluation framework for XAI, which utilizes i) an explanation-agnostic fine-tuning strategy, thus mitigating the information leakage issue and ii) a random masking operation that ensures that the removal step does not generate an OOD input. We designed controlled experiments with state-of-the-art (SOTA) explainers and their degraded version to verify the correctness of our framework. We conducted experiments on multiple data structures, such as images, time series, and natural language. The results demonstrate that F-Fidelity significantly improves upon prior evaluation metrics in recovering the ground-truth ranking of the explainers. Furthermore, we show both theoretically and empirically that, given a faithful explainer, F-Fidelity metric can be used to compute the sparsity of influential input components, i.e., to extract the true explanation size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02970v1-abstract-full').style.display = 'none'; document.getElementById('2410.02970v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint; 26 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02795">arXiv:2410.02795</a> <span> [<a href="https://arxiv.org/pdf/2410.02795">pdf</a>, <a href="https://arxiv.org/format/2410.02795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TaCIE: Enhancing Instruction Comprehension in Large Language Models through Task-Centred Instruction Evolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiuding Yang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Shengyao Lu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weidong Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiangyang Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+K">Kaitong Yang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yu Xu</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+D">Di Niu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02795v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) require precise alignment with complex instructions to optimize their performance in real-world applications. As the demand for refined instruction tuning data increases, traditional methods that evolve simple seed instructions often struggle to effectively enhance complexity or manage difficulty scaling across various domains. Our innovative approach, Task-Centered In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02795v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02795v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02795v1-abstract-full" style="display: none;"> Large Language Models (LLMs) require precise alignment with complex instructions to optimize their performance in real-world applications. As the demand for refined instruction tuning data increases, traditional methods that evolve simple seed instructions often struggle to effectively enhance complexity or manage difficulty scaling across various domains. Our innovative approach, Task-Centered Instruction Evolution (TaCIE), addresses these shortcomings by redefining instruction evolution from merely evolving seed instructions to a more dynamic and comprehensive combination of elements. TaCIE starts by deconstructing complex instructions into their fundamental components. It then generates and integrates new elements with the original ones, reassembling them into more sophisticated instructions that progressively increase in difficulty, diversity, and complexity. Applied across multiple domains, LLMs fine-tuned with these evolved instructions have substantially outperformed those tuned with conventional methods, marking a significant advancement in instruction-based model fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02795v1-abstract-full').style.display = 'none'; document.getElementById('2410.02795v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02143">arXiv:2410.02143</a> <span> [<a href="https://arxiv.org/pdf/2410.02143">pdf</a>, <a href="https://arxiv.org/format/2410.02143">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Plug-and-Play Controllable Generation for Discrete Masked Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuchen Zhu</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+M">Molei Tao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yongxin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02143v1-abstract-short" style="display: inline;"> This article makes discrete masked models for the generative modeling of discrete data controllable. The goal is to generate samples of a discrete random variable that adheres to a posterior distribution, satisfies specific constraints, or optimizes a reward function. This methodological development enables broad applications across downstream tasks such as class-specific image generation and prot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02143v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02143v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02143v1-abstract-full" style="display: none;"> This article makes discrete masked models for the generative modeling of discrete data controllable. The goal is to generate samples of a discrete random variable that adheres to a posterior distribution, satisfies specific constraints, or optimizes a reward function. This methodological development enables broad applications across downstream tasks such as class-specific image generation and protein design. Existing approaches for controllable generation of masked models typically rely on task-specific fine-tuning or additional modifications, which can be inefficient and resource-intensive. To overcome these limitations, we propose a novel plug-and-play framework based on importance sampling that bypasses the need for training a conditional score. Our framework is agnostic to the choice of control criteria, requires no gradient information, and is well-suited for tasks such as posterior sampling, Bayesian inverse problems, and constrained generation. We demonstrate the effectiveness of our approach through extensive experiments, showcasing its versatility across multiple domains, including protein design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02143v1-abstract-full').style.display = 'none'; document.getElementById('2410.02143v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15049">arXiv:2409.15049</a> <span> [<a href="https://arxiv.org/pdf/2409.15049">pdf</a>, <a href="https://arxiv.org/format/2409.15049">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> PackageIntel: Leveraging Large Language Models for Automated Intelligence Extraction in Package Ecosystems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenbo Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengwei Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Limin Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiahui Wu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhengzi Xu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Cheng Huang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yong Fang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15049v2-abstract-short" style="display: inline;"> The rise of malicious packages in public registries poses a significant threat to software supply chain (SSC) security. Although academia and industry employ methods like software composition analysis (SCA) to address this issue, existing approaches often lack timely and comprehensive intelligence updates. This paper introduces PackageIntel, a novel platform that revolutionizes the collection, pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15049v2-abstract-full').style.display = 'inline'; document.getElementById('2409.15049v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15049v2-abstract-full" style="display: none;"> The rise of malicious packages in public registries poses a significant threat to software supply chain (SSC) security. Although academia and industry employ methods like software composition analysis (SCA) to address this issue, existing approaches often lack timely and comprehensive intelligence updates. This paper introduces PackageIntel, a novel platform that revolutionizes the collection, processing, and retrieval of malicious package intelligence. By utilizing exhaustive search techniques, snowball sampling from diverse sources, and large language models (LLMs) with specialized prompts, PackageIntel ensures enhanced coverage, timeliness, and accuracy. We have developed a comprehensive database containing 20,692 malicious NPM and PyPI packages sourced from 21 distinct intelligence repositories. Empirical evaluations demonstrate that PackageIntel achieves a precision of 98.6% and an F1 score of 92.0 in intelligence extraction. Additionally, it detects threats on average 70% earlier than leading databases like Snyk and OSV, and operates cost-effectively at $0.094 per intelligence piece. The platform has successfully identified and reported over 1,000 malicious packages in downstream package manager mirror registries. This research provides a robust, efficient, and timely solution for identifying and mitigating threats within the software supply chain ecosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15049v2-abstract-full').style.display = 'none'; document.getElementById('2409.15049v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13832">arXiv:2409.13832</a> <span> [<a href="https://arxiv.org/pdf/2409.13832">pdf</a>, <a href="https://arxiv.org/format/2409.13832">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> GTSinger: A Global Multi-Technique Singing Corpus with Realistic Music Scores for All Singing Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+C">Changhao Pan</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenxiang Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruiqi Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhiyuan Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jialei Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Wenhao Xu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jingyu Lu</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Z">Zhiqing Hong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chuxin Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">LiChao Zhang</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jinzheng He</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Ziyue Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxin Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chen Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jiecheng Zhou</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xinyu Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhou Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13832v5-abstract-short" style="display: inline;"> The scarcity of high-quality and multi-task singing datasets significantly hinders the development of diverse controllable and personalized singing tasks, as existing singing datasets suffer from low quality, limited diversity of languages and singers, absence of multi-technique information and realistic music scores, and poor task suitability. To tackle these problems, we present GTSinger, a larg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13832v5-abstract-full').style.display = 'inline'; document.getElementById('2409.13832v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13832v5-abstract-full" style="display: none;"> The scarcity of high-quality and multi-task singing datasets significantly hinders the development of diverse controllable and personalized singing tasks, as existing singing datasets suffer from low quality, limited diversity of languages and singers, absence of multi-technique information and realistic music scores, and poor task suitability. To tackle these problems, we present GTSinger, a large global, multi-technique, free-to-use, high-quality singing corpus with realistic music scores, designed for all singing tasks, along with its benchmarks. Particularly, (1) we collect 80.59 hours of high-quality singing voices, forming the largest recorded singing dataset; (2) 20 professional singers across nine widely spoken languages offer diverse timbres and styles; (3) we provide controlled comparison and phoneme-level annotations of six commonly used singing techniques, helping technique modeling and control; (4) GTSinger offers realistic music scores, assisting real-world musical composition; (5) singing voices are accompanied by manual phoneme-to-audio alignments, global style labels, and 16.16 hours of paired speech for various singing tasks. Moreover, to facilitate the use of GTSinger, we conduct four benchmark experiments: technique-controllable singing voice synthesis, technique recognition, style transfer, and speech-to-singing conversion. The corpus and demos can be found at http://aaronz345.github.io/GTSingerDemo/. We provide the dataset and the code for processing data and conducting benchmarks at https://huggingface.co/datasets/GTSinger/GTSinger and https://github.com/AaronZ345/GTSinger. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13832v5-abstract-full').style.display = 'none'; document.getElementById('2409.13832v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024 (Spotlight)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13445">arXiv:2409.13445</a> <span> [<a href="https://arxiv.org/pdf/2409.13445">pdf</a>, <a href="https://arxiv.org/format/2409.13445">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Selective Exploration and Information Gathering in Search and Rescue Using Hierarchical Learning Guided by Natural Language Input </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Panagopoulos%2C+D">Dimitrios Panagopoulos</a>, <a href="/search/cs?searchtype=author&query=Perrusquia%2C+A">Adolfo Perrusquia</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13445v1-abstract-short" style="display: inline;"> In recent years, robots and autonomous systems have become increasingly integral to our daily lives, offering solutions to complex problems across various domains. Their application in search and rescue (SAR) operations, however, presents unique challenges. Comprehensively exploring the disaster-stricken area is often infeasible due to the vastness of the terrain, transformed environment, and the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13445v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13445v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13445v1-abstract-full" style="display: none;"> In recent years, robots and autonomous systems have become increasingly integral to our daily lives, offering solutions to complex problems across various domains. Their application in search and rescue (SAR) operations, however, presents unique challenges. Comprehensively exploring the disaster-stricken area is often infeasible due to the vastness of the terrain, transformed environment, and the time constraints involved. Traditional robotic systems typically operate on predefined search patterns and lack the ability to incorporate and exploit ground truths provided by human stakeholders, which can be the key to speeding up the learning process and enhancing triage. Addressing this gap, we introduce a system that integrates social interaction via large language models (LLMs) with a hierarchical reinforcement learning (HRL) framework. The proposed system is designed to translate verbal inputs from human stakeholders into actionable RL insights and adjust its search strategy. By leveraging human-provided information through LLMs and structuring task execution through HRL, our approach not only bridges the gap between autonomous capabilities and human intelligence but also significantly improves the agent's learning efficiency and decision-making process in environments characterised by long horizons and sparse rewards. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13445v1-abstract-full').style.display = 'none'; document.getElementById('2409.13445v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Pre-print version of the accepted paper to appear in IEEE International Conference on Systems, Man and Cybernetics (SMC) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13423">arXiv:2409.13423</a> <span> [<a href="https://arxiv.org/pdf/2409.13423">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Causal Reinforcement Learning for Optimisation of Robot Dynamics in Unknown Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dcruz%2C+J+G">Julian Gerald Dcruz</a>, <a href="/search/cs?searchtype=author&query=Mahoney%2C+S">Sam Mahoney</a>, <a href="/search/cs?searchtype=author&query=Chua%2C+J+Y">Jia Yun Chua</a>, <a href="/search/cs?searchtype=author&query=Soukhabandith%2C+A">Adoundeth Soukhabandith</a>, <a href="/search/cs?searchtype=author&query=Mugabe%2C+J">John Mugabe</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weisi Guo</a>, <a href="/search/cs?searchtype=author&query=Arana-Catania%2C+M">Miguel Arana-Catania</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13423v1-abstract-short" style="display: inline;"> Autonomous operations of robots in unknown environments are challenging due to the lack of knowledge of the dynamics of the interactions, such as the objects' movability. This work introduces a novel Causal Reinforcement Learning approach to enhancing robotics operations and applies it to an urban search and rescue (SAR) scenario. Our proposed machine learning architecture enables robots to learn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13423v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13423v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13423v1-abstract-full" style="display: none;"> Autonomous operations of robots in unknown environments are challenging due to the lack of knowledge of the dynamics of the interactions, such as the objects' movability. This work introduces a novel Causal Reinforcement Learning approach to enhancing robotics operations and applies it to an urban search and rescue (SAR) scenario. Our proposed machine learning architecture enables robots to learn the causal relationships between the visual characteristics of the objects, such as texture and shape, and the objects' dynamics upon interaction, such as their movability, significantly improving their decision-making processes. We conducted causal discovery and RL experiments demonstrating the Causal RL's superior performance, showing a notable reduction in learning times by over 24.5% in complex situations, compared to non-causal models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13423v1-abstract-full').style.display = 'none'; document.getElementById('2409.13423v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 12 figures, 3 tables. To be presented in 10th IEEE International Smart Cities Conference (ISC2-2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13388">arXiv:2409.13388</a> <span> [<a href="https://arxiv.org/pdf/2409.13388">pdf</a>, <a href="https://arxiv.org/format/2409.13388">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Scalable Multi-Objective Optimization for Robust Traffic Signal Control in Uncertain Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+W">Weian Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wuzhao Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiou Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lun Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Li Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dongyang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13388v1-abstract-short" style="display: inline;"> Intelligent traffic signal control is essential to modern urban management, with important impacts on economic efficiency, environmental sustainability, and quality of daily life. However, in current decades, it continues to pose significant challenges in managing large-scale traffic networks, coordinating intersections, and ensuring robustness under uncertain traffic conditions. This paper presen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13388v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13388v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13388v1-abstract-full" style="display: none;"> Intelligent traffic signal control is essential to modern urban management, with important impacts on economic efficiency, environmental sustainability, and quality of daily life. However, in current decades, it continues to pose significant challenges in managing large-scale traffic networks, coordinating intersections, and ensuring robustness under uncertain traffic conditions. This paper presents a scalable multi-objective optimization approach for robust traffic signal control in dynamic and uncertain urban environments. A multi-objective optimization model is proposed in this paper, which incorporates stochastic variables and probabilistic traffic patterns to capture traffic flow dynamics and uncertainty. We propose an algorithm named Adaptive Hybrid Multi-Objective Optimization Algorithm (AHMOA), which addresses the uncertainties of city traffic, including network-wide signal coordination, fluctuating patterns, and environmental impacts. AHMOA simultaneously optimizes multiple objectives, such as average delay, network stability, and system robustness, while adapting to unpredictable changes in traffic. The algorithm combines evolutionary strategies with an adaptive mechanism to balance exploration and exploitation, and incorporates a memory-based evaluation mechanism to leverage historical traffic data. Simulations are conducted in different cities including Manhattan, Paris, Sao Paulo, and Istanbul. The experimental results demonstrate that AHMOA consistently outperforms several state-of-the-art algorithms and the algorithm is competent to provide scalable, robust Pareto optimal solutions for managing complex traffic systems under uncertain environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13388v1-abstract-full').style.display = 'none'; document.getElementById('2409.13388v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 6 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Guo%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Guo%2C+W&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>