CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 86 results for author: <span class="mathjax">Xing, H</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Xing%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Xing, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Xing%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Xing, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xing%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xing%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xing%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12802">arXiv:2502.12802</a> <span> [<a href="https://arxiv.org/pdf/2502.12802">pdf</a>, <a href="https://arxiv.org/format/2502.12802">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PPGF: Probability Pattern-Guided Time Series Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yanru Sun</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zongxia Xie</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haoyu Xing</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hualong Yu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Q">Qinghua Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12802v1-abstract-short" style="display: inline;"> Time series forecasting (TSF) is an essential branch of machine learning with various applications. Most methods for TSF focus on constructing different networks to extract better information and improve performance. However, practical application data contain different internal mechanisms, resulting in a mixture of multiple patterns. That is, the model's ability to fit different patterns is diffe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12802v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12802v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12802v1-abstract-full" style="display: none;"> Time series forecasting (TSF) is an essential branch of machine learning with various applications. Most methods for TSF focus on constructing different networks to extract better information and improve performance. However, practical application data contain different internal mechanisms, resulting in a mixture of multiple patterns. That is, the model's ability to fit different patterns is different and generates different errors. In order to solve this problem, we propose an end-to-end framework, namely probability pattern-guided time series forecasting (PPGF). PPGF reformulates the TSF problem as a forecasting task guided by probabilistic pattern classification. Firstly, we propose the grouping strategy to approach forecasting problems as classification and alleviate the impact of data imbalance on classification. Secondly, we predict in the corresponding class interval to guarantee the consistency of classification and forecasting. In addition, True Class Probability (TCP) is introduced to pay more attention to the difficult samples to improve the classification accuracy. Detailedly, PPGF classifies the different patterns to determine which one the target value may belong to and estimates it accurately in the corresponding interval. To demonstrate the effectiveness of the proposed framework, we conduct extensive experiments on real-world datasets, and PPGF achieves significant performance improvements over several baseline methods. Furthermore, the effectiveness of TCP and the necessity of consistency between classification and forecasting are proved in the experiments. All data and codes are available online: https://github.com/syrGitHub/PPGF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12802v1-abstract-full').style.display = 'none'; document.getElementById('2502.12802v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16362">arXiv:2501.16362</a> <span> [<a href="https://arxiv.org/pdf/2501.16362">pdf</a>, <a href="https://arxiv.org/format/2501.16362">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Fluid Dynamics">physics.flu-dyn</span> </div> </div> <p class="title is-5 mathjax"> A novel Trunk Branch-net PINN for flow and heat transfer prediction in porous medium </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haoyun Xing</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+K">Kaiyan Jin</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+G">Guice Yao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jin Zhao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+D">Dichu Xu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+D">Dongsheng Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16362v1-abstract-short" style="display: inline;"> A novel Trunk-Branch (TB)-net physics-informed neural network (PINN) architecture is developed, which is a PINN-based method incorporating trunk and branch nets to capture both global and local features. The aim is to solve four main classes of problems: forward flow problem, forward heat transfer problem, inverse heat transfer problem, and transfer learning problem within the porous medium, which… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16362v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16362v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16362v1-abstract-full" style="display: none;"> A novel Trunk-Branch (TB)-net physics-informed neural network (PINN) architecture is developed, which is a PINN-based method incorporating trunk and branch nets to capture both global and local features. The aim is to solve four main classes of problems: forward flow problem, forward heat transfer problem, inverse heat transfer problem, and transfer learning problem within the porous medium, which are notoriously complex that could not be handled by origin PINN. In the proposed TB-net PINN architecture, a Fully-connected Neural Network (FNN) is used as the trunk net, followed by separated FNNs as the branch nets with respect to outputs, and automatic differentiation is performed for partial derivatives of outputs with respect to inputs by considering various physical loss. The effectiveness and flexibility of the novel TB-net PINN architecture is demonstrated through a collection of forward problems, and transfer learning validates the feasibility of resource reuse. Combining with the superiority over traditional numerical methods in solving inverse problems, the proposed TB-net PINN shows its great potential for practical engineering applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16362v1-abstract-full').style.display = 'none'; document.getElementById('2501.16362v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages, 17 figures,</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14544">arXiv:2501.14544</a> <span> [<a href="https://arxiv.org/pdf/2501.14544">pdf</a>, <a href="https://arxiv.org/format/2501.14544">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Distributed Conformal Prediction via Message Passing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14544v1-abstract-short" style="display: inline;"> Post-hoc calibration of pre-trained models is critical for ensuring reliable inference, especially in safety-critical domains such as healthcare. Conformal Prediction (CP) offers a robust post-hoc calibration framework, providing distribution-free statistical coverage guarantees for prediction sets by leveraging held-out datasets. In this work, we address a decentralized setting where each device… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14544v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14544v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14544v1-abstract-full" style="display: none;"> Post-hoc calibration of pre-trained models is critical for ensuring reliable inference, especially in safety-critical domains such as healthcare. Conformal Prediction (CP) offers a robust post-hoc calibration framework, providing distribution-free statistical coverage guarantees for prediction sets by leveraging held-out datasets. In this work, we address a decentralized setting where each device has limited calibration data and can communicate only with its neighbors over an arbitrary graph topology. We propose two message-passing-based approaches for achieving reliable inference via CP: quantile-based distributed conformal prediction (Q-DCP) and histogram-based distributed conformal prediction (H-DCP). Q-DCP employs distributed quantile regression enhanced with tailored smoothing and regularization terms to accelerate convergence, while H-DCP uses a consensus-based histogram estimation approach. Through extensive experiments, we investigate the trade-offs between hyperparameter tuning requirements, communication overhead, coverage guarantees, and prediction set sizes across different network topologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14544v1-abstract-full').style.display = 'none'; document.getElementById('2501.14544v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 11 figures, submitted for posssible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11015">arXiv:2501.11015</a> <span> [<a href="https://arxiv.org/pdf/2501.11015">pdf</a>, <a href="https://arxiv.org/format/2501.11015">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Wireless Control over Edge Networks: Joint User Association and Communication-Computation Co-Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhilin Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yiyang Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ye Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11015v1-abstract-short" style="display: inline;"> This paper studies a wireless networked control system with multiple base stations (BSs) cooperatively coordinating the wireless control of a number of subsystems each consisting of a plant, a sensor, and an actuator. In this system, each sensor first offloads the sensing data to its associated BS, which then employs mobile edge computing (MEC) to process the data and sends the command signals bac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11015v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11015v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11015v1-abstract-full" style="display: none;"> This paper studies a wireless networked control system with multiple base stations (BSs) cooperatively coordinating the wireless control of a number of subsystems each consisting of a plant, a sensor, and an actuator. In this system, each sensor first offloads the sensing data to its associated BS, which then employs mobile edge computing (MEC) to process the data and sends the command signals back to the actuator for remote control. We consider the time-division-multiple-access (TDMA) service protocol among different BSs to facilitate the cascaded communication and computation process, in which different BSs implement the uplink data collection and downlink command broadcasting over orthogonal time slots. We also employ the massive multiple-input multiple-output (MIMO) at BSs, based on which each BS serves its associated sensors or actuators over the same time-frequency resources via spatial multiplexing. Under this setup, we jointly design the association between BSs and sensors/actuators as well as the joint communication and computation resource allocation, with the objective of minimizing the closed-loop control latency of the multiple subsystems while ensuring their control stability. The optimization takes into account the transmission uncertainty caused by both the hyper reliable and low-latency communications (HRLLC) and the inter-user interference , as well as the communication and computation resource constraints at distributed nodes. To solve the challenging non-convex joint optimization problem, we develop an efficient algorithm by employing the techniques of alternating optimization and successive convex approximation (SCA). Numerical results show that the proposed joint BS-sensor/actuator association and resource allocation design significantly outperforms other heuristic schemes and frequency-division-multiple-access (FDMA) counterpart. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11015v1-abstract-full').style.display = 'none'; document.getElementById('2501.11015v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06038">arXiv:2501.06038</a> <span> [<a href="https://arxiv.org/pdf/2501.06038">pdf</a>, <a href="https://arxiv.org/format/2501.06038">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Holistically Point-guided Text Framework for Weakly-Supervised Camouflaged Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mok%2C+T+Q">Tsui Qin Mok</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shuyong Gao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haozhe Xing</a>, <a href="/search/cs?searchtype=author&query=He%2C+M">Miaoyang He</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenqiang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06038v1-abstract-short" style="display: inline;"> Weakly-Supervised Camouflaged Object Detection (WSCOD) has gained popularity for its promise to train models with weak labels to segment objects that visually blend into their surroundings. Recently, some methods using sparsely-annotated supervision shown promising results through scribbling in WSCOD, while point-text supervision remains underexplored. Hence, this paper introduces a novel holistic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06038v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06038v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06038v1-abstract-full" style="display: none;"> Weakly-Supervised Camouflaged Object Detection (WSCOD) has gained popularity for its promise to train models with weak labels to segment objects that visually blend into their surroundings. Recently, some methods using sparsely-annotated supervision shown promising results through scribbling in WSCOD, while point-text supervision remains underexplored. Hence, this paper introduces a novel holistically point-guided text framework for WSCOD by decomposing into three phases: segment, choose, train. Specifically, we propose Point-guided Candidate Generation (PCG), where the point's foreground serves as a correction for the text path to explicitly correct and rejuvenate the loss detection object during the mask generation process (SEGMENT). We also introduce a Qualified Candidate Discriminator (QCD) to choose the optimal mask from a given text prompt using CLIP (CHOOSE), and employ the chosen pseudo mask for training with a self-supervised Vision Transformer (TRAIN). Additionally, we developed a new point-supervised dataset (P2C-COD) and a text-supervised dataset (T-COD). Comprehensive experiments on four benchmark datasets demonstrate our method outperforms state-of-the-art methods by a large margin, and also outperforms some existing fully-supervised camouflaged object detection methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06038v1-abstract-full').style.display = 'none'; document.getElementById('2501.06038v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02523">arXiv:2501.02523</a> <span> [<a href="https://arxiv.org/pdf/2501.02523">pdf</a>, <a href="https://arxiv.org/format/2501.02523">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Face-MakeUp: Multimodal Facial Prompts for Text-to-Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dai%2C+D">Dawei Dai</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+M">Mingming Jia</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yinxiu Zhou</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hang Xing</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chenghang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02523v1-abstract-short" style="display: inline;"> Facial images have extensive practical applications. Although the current large-scale text-image diffusion models exhibit strong generation capabilities, it is challenging to generate the desired facial images using only text prompt. Image prompts are a logical choice. However, current methods of this type generally focus on general domain. In this paper, we aim to optimize image makeup techniques… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02523v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02523v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02523v1-abstract-full" style="display: none;"> Facial images have extensive practical applications. Although the current large-scale text-image diffusion models exhibit strong generation capabilities, it is challenging to generate the desired facial images using only text prompt. Image prompts are a logical choice. However, current methods of this type generally focus on general domain. In this paper, we aim to optimize image makeup techniques to generate the desired facial images. Specifically, (1) we built a dataset of 4 million high-quality face image-text pairs (FaceCaptionHQ-4M) based on LAION-Face to train our Face-MakeUp model; (2) to maintain consistency with the reference facial image, we extract/learn multi-scale content features and pose features for the facial image, integrating these into the diffusion model to enhance the preservation of facial identity features for diffusion models. Validation on two face-related test datasets demonstrates that our Face-MakeUp can achieve the best comprehensive performance.All codes are available at:https://github.com/ddw2AIGROUP2CQUPT/Face-MakeUp <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02523v1-abstract-full').style.display = 'none'; document.getElementById('2501.02523v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11479">arXiv:2412.11479</a> <span> [<a href="https://arxiv.org/pdf/2412.11479">pdf</a>, <a href="https://arxiv.org/format/2412.11479">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/GCWkshps58843.2023.10464958">10.1109/GCWkshps58843.2023.10464958 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Wireless Environmental Information Theory: A New Paradigm towards 6G Online and Proactive Environment Intelligence Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Li Yu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shaoyi Liu</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yichen Cai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hongbo Xing</a>, <a href="/search/cs?searchtype=author&query=jiang%2C+T">Tao jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11479v1-abstract-short" style="display: inline;"> The channel is one of the five critical components of a communication system, and its ergodic capacity is based on all realizations of statistic channel model. This statistical paradigm has successfully guided the design of mobile communication systems from 1G to 5G. However, this approach relies on offline channel measurements in specific environments, and the system passively adapts to new envir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11479v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11479v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11479v1-abstract-full" style="display: none;"> The channel is one of the five critical components of a communication system, and its ergodic capacity is based on all realizations of statistic channel model. This statistical paradigm has successfully guided the design of mobile communication systems from 1G to 5G. However, this approach relies on offline channel measurements in specific environments, and the system passively adapts to new environments, resulting in deviation from the optimal performance. With the pursuit of higher capacity and data rate of 6G, especially facing the ubiquitous environments, there is an urgent need for a new paradigm to combat the randomness of channel, i.e., more proactive and online manner. Motivated by this, we propose an environment intelligence communication (EIC) based on wireless environmental information theory (WEIT) for 6G. The proposed EIC architecture is composed of three steps: Firstly, wireless environmental information (WEI) is acquired using sensing techniques. Then, leveraging WEI and channel data, AI techniques are employed to predict channel fading, thereby mitigating channel uncertainty. Thirdly, the communication system autonomously determines the optimal air-interface transmission strategy based on real-time channel predictions, enabling intelligent interaction with the physical environment. To make this attractive paradigm shift from theory to practice, we answer three key problems to establish WEIT for the first time. How should WEI be defined? Can it be quantified? Does it hold the same properties as statistical communication information? Furthermore, EIC aided by WEI (EIC-WEI) is validated across multiple air-interface tasks, including CSI prediction, beam prediction, and radio resource management. Simulation results demonstrate that the proposed EIC-WEI significantly outperforms the statistical paradigm in decreasing overhead and performance optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11479v1-abstract-full').style.display = 'none'; document.getElementById('2412.11479v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07428">arXiv:2412.07428</a> <span> [<a href="https://arxiv.org/pdf/2412.07428">pdf</a>, <a href="https://arxiv.org/format/2412.07428">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> When UAV Meets Federated Learning: Latency Minimization via Joint Trajectory Design and Resource Allocation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenchao Liu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+G">Gui Gui</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yanyan Shen</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07428v1-abstract-short" style="display: inline;"> Federated learning (FL) has emerged as a pivotal solution for training machine learning models over wireless networks, particularly for Internet of Things (IoT) devices with limited computation resources. Despite its benefits, the efficiency of FL is often restricted by the communication quality between IoT devices and the central server. To address this issue, we introduce an innovative approach… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07428v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07428v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07428v1-abstract-full" style="display: none;"> Federated learning (FL) has emerged as a pivotal solution for training machine learning models over wireless networks, particularly for Internet of Things (IoT) devices with limited computation resources. Despite its benefits, the efficiency of FL is often restricted by the communication quality between IoT devices and the central server. To address this issue, we introduce an innovative approach by deploying an unmanned aerial vehicle (UAV) as a mobile FL server to enhance the training process of FL. By leveraging the UAV's maneuverability, we establish robust line-of-sight connections with IoT devices, significantly improving communication capacity. To improve the overall training efficiency, we formulate a latency minimization problem by jointly optimizing the bandwidth allocation, computing frequencies, transmit power for both the UAV and IoT devices, and the UAV's trajectory. Then, an efficient alternating optimization algorithm is developed to solve it efficiently. Furthermore, we analyze the convergence and computational complexity of the proposed algorithm. Finally, numerical results demonstrate that our proposed scheme not only outperforms existing benchmark schemes in terms of latency but also achieves training efficiency that closely approximate the ideal scenario. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07428v1-abstract-full').style.display = 'none'; document.getElementById('2412.07428v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This manuscript has been submitted to IEEE</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06325">arXiv:2412.06325</a> <span> [<a href="https://arxiv.org/pdf/2412.06325">pdf</a>, <a href="https://arxiv.org/format/2412.06325">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Q-PnV: A Quantum Consensus Mechanism for Security Consortium Blockchains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jianming Lin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hui Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hongjian Xing</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+R">Runhuai Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Weixiang Huang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+S">Shaowen Deng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanping Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+W">Weimin Zeng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+P">Ping Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiyu Wang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+T">Tao Sun</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xiongyan Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06325v1-abstract-short" style="display: inline;"> Due to the rapid development of quantum computing, many classical blockchain technologies are now considered insecure. The emergence of quantum blockchain holds promise for addressing this issue. Various quantum consensus algorithms have been proposed so far, but there has not yet been a quantum consensus algorithm tailored specifically for consortium blockchain scenarios. In this paper, we propos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06325v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06325v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06325v1-abstract-full" style="display: none;"> Due to the rapid development of quantum computing, many classical blockchain technologies are now considered insecure. The emergence of quantum blockchain holds promise for addressing this issue. Various quantum consensus algorithms have been proposed so far, but there has not yet been a quantum consensus algorithm tailored specifically for consortium blockchain scenarios. In this paper, we propose a novel quantum consensus mechanism, named Q-PnV. This consensus mechanism is based on the classical Proof of Vote (PoV), integrating quantum voting, quantum digital signature and quantum random number generators (QRNGs). By combining Q-PnV with a quantum blockchain using weighted hypergraph states, we propose a comprehensive quantum blockchain solution for consortium blockchain scenarios. Compared to the classical method, the quantum blockchain based on Q-PnV can resist quantum attacks and shows significant improvements in security and fairness, making it better suit-ed for the future quantum era. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06325v1-abstract-full').style.display = 'none'; document.getElementById('2412.06325v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13000">arXiv:2411.13000</a> <span> [<a href="https://arxiv.org/pdf/2411.13000">pdf</a>, <a href="https://arxiv.org/format/2411.13000">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> NCAirFL: CSI-Free Over-the-Air Federated Learning Based on Non-Coherent Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Michelusi%2C+N">Nicol貌 Michelusi</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13000v1-abstract-short" style="display: inline;"> Over-the-air federated learning (FL), i.e., AirFL, leverages computing primitively over multiple access channels. A long-standing challenge in AirFL is to achieve coherent signal alignment without relying on expensive channel estimation and feedback. This paper proposes NCAirFL, a CSI-free AirFL scheme based on unbiased non-coherent detection at the edge server. By exploiting binary dithering and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13000v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13000v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13000v1-abstract-full" style="display: none;"> Over-the-air federated learning (FL), i.e., AirFL, leverages computing primitively over multiple access channels. A long-standing challenge in AirFL is to achieve coherent signal alignment without relying on expensive channel estimation and feedback. This paper proposes NCAirFL, a CSI-free AirFL scheme based on unbiased non-coherent detection at the edge server. By exploiting binary dithering and a long-term memory based error-compensation mechanism, NCAirFL achieves a convergence rate of order $\mathcal{O}(1/\sqrt{T})$ in terms of the average square norm of the gradient for general non-convex and smooth objectives, where $T$ is the number of communication rounds. Experiments demonstrate the competitive performance of NCAirFL compared to vanilla FL with ideal communications and to coherent transmission-based benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13000v1-abstract-full').style.display = 'none'; document.getElementById('2411.13000v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures, submitted for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07722">arXiv:2411.07722</a> <span> [<a href="https://arxiv.org/pdf/2411.07722">pdf</a>, <a href="https://arxiv.org/format/2411.07722">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Is Cognition consistent with Perception? Assessing and Mitigating Multimodal Knowledge Conflicts in Document Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+Z">Zirui Shao</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+C">Chuwei Luo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhaoqing Zhu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hangdi Xing</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhi Yu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Q">Qi Zheng</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+J">Jiajun Bu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07722v1-abstract-short" style="display: inline;"> Multimodal large language models (MLLMs) have shown impressive capabilities in document understanding, a rapidly growing research area with significant industrial demand in recent years. As a multimodal task, document understanding requires models to possess both perceptual and cognitive abilities. However, current MLLMs often face conflicts between perception and cognition. Taking a document VQA… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07722v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07722v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07722v1-abstract-full" style="display: none;"> Multimodal large language models (MLLMs) have shown impressive capabilities in document understanding, a rapidly growing research area with significant industrial demand in recent years. As a multimodal task, document understanding requires models to possess both perceptual and cognitive abilities. However, current MLLMs often face conflicts between perception and cognition. Taking a document VQA task (cognition) as an example, an MLLM might generate answers that do not match the corresponding visual content identified by its OCR (perception). This conflict suggests that the MLLM might struggle to establish an intrinsic connection between the information it "sees" and what it "understands." Such conflicts challenge the intuitive notion that cognition is consistent with perception, hindering the performance and explainability of MLLMs. In this paper, we define the conflicts between cognition and perception as Cognition and Perception (C&P) knowledge conflicts, a form of multimodal knowledge conflicts, and systematically assess them with a focus on document understanding. Our analysis reveals that even GPT-4o, a leading MLLM, achieves only 68.6% C&P consistency. To mitigate the C&P knowledge conflicts, we propose a novel method called Multimodal Knowledge Consistency Fine-tuning. This method first ensures task-specific consistency and then connects the cognitive and perceptual knowledge. Our method significantly reduces C&P knowledge conflicts across all tested MLLMs and enhances their performance in both cognitive and perceptual tasks in most scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07722v1-abstract-full').style.display = 'none'; document.getElementById('2411.07722v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15044">arXiv:2410.15044</a> <span> [<a href="https://arxiv.org/pdf/2410.15044">pdf</a>, <a href="https://arxiv.org/format/2410.15044">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Adanonymizer: Interactively Navigating and Balancing the Duality of Privacy and Output Performance in Human-LLM Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuning Zhang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xin Yi</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haobin Xing</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+L">Lyumanshan Ye</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yongquan Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hewu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15044v2-abstract-short" style="display: inline;"> Current Large Language Models (LLMs) cannot support users to precisely balance privacy protection and output performance during individual consultations. We introduce Adanonymizer, an anonymization plug-in that allows users to control this balance by navigating a trade-off curve. A survey (N=221) revealed a privacy paradox, where users frequently disclosed sensitive information despite acknowledgi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15044v2-abstract-full').style.display = 'inline'; document.getElementById('2410.15044v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15044v2-abstract-full" style="display: none;"> Current Large Language Models (LLMs) cannot support users to precisely balance privacy protection and output performance during individual consultations. We introduce Adanonymizer, an anonymization plug-in that allows users to control this balance by navigating a trade-off curve. A survey (N=221) revealed a privacy paradox, where users frequently disclosed sensitive information despite acknowledging privacy risks. The study further demonstrated that privacy risks were not significantly correlated with model output performance, highlighting the potential to navigate this trade-off. Adanonymizer normalizes privacy and utility ratings by type and automates the pseudonymization of sensitive terms based on user preferences, significantly reducing user effort. Its 2D color palette interface visualizes the privacy-utility trade-off, allowing users to adjust the balance by manipulating a point. An evaluation (N=36) compared Adanonymizer with ablation methods and differential privacy techniques, where Adanonymizer significantly reduced modification time, achieved better perceived model performance and overall user preference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15044v2-abstract-full').style.display = 'none'; document.getElementById('2410.15044v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14931">arXiv:2410.14931</a> <span> [<a href="https://arxiv.org/pdf/2410.14931">pdf</a>, <a href="https://arxiv.org/format/2410.14931">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> "Ghost of the past": identifying and resolving privacy leakage from LLM's memory through proactive user interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuning Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+L">Lyumanshan Ye</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xin Yi</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jingyu Tang</a>, <a href="/search/cs?searchtype=author&query=Shui%2C+B">Bo Shui</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haobin Xing</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Pengfei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hewu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14931v1-abstract-short" style="display: inline;"> Memories, encompassing past inputs in context window and retrieval-augmented generation (RAG), frequently surface during human-LLM interactions, yet users are often unaware of their presence and the associated privacy risks. To address this, we propose MemoAnalyzer, a system for identifying, visualizing, and managing private information within memories. A semi-structured interview (N=40) revealed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14931v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14931v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14931v1-abstract-full" style="display: none;"> Memories, encompassing past inputs in context window and retrieval-augmented generation (RAG), frequently surface during human-LLM interactions, yet users are often unaware of their presence and the associated privacy risks. To address this, we propose MemoAnalyzer, a system for identifying, visualizing, and managing private information within memories. A semi-structured interview (N=40) revealed that low privacy awareness was the primary challenge, while proactive privacy control emerged as the most common user need. MemoAnalyzer uses a prompt-based method to infer and identify sensitive information from aggregated past inputs, allowing users to easily modify sensitive content. Background color temperature and transparency are mapped to inference confidence and sensitivity, streamlining privacy adjustments. A 5-day evaluation (N=36) comparing MemoAnalyzer with the default GPT setting and a manual modification baseline showed MemoAnalyzer significantly improved privacy awareness and protection without compromising interaction speed. Our study contributes to privacy-conscious LLM design, offering insights into privacy protection for Human-AI interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14931v1-abstract-full').style.display = 'none'; document.getElementById('2410.14931v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10899">arXiv:2410.10899</a> <span> [<a href="https://arxiv.org/pdf/2410.10899">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GPTON: Generative Pre-trained Transformers enhanced with Ontology Narration for accurate annotation of biological data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongbin Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenbo Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jinbo Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hanwen Xing</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hua Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhao Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+W+J">W. Jim Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10899v2-abstract-short" style="display: inline;"> By leveraging GPT-4 for ontology narration, we developed GPTON to infuse structured knowledge into LLMs through verbalized ontology terms, achieving accurate text and ontology annotations for over 68% of gene sets in the top five predictions. Manual evaluations confirm GPTON's robustness, highlighting its potential to harness LLMs and structured knowledge to significantly advance biomedical resear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10899v2-abstract-full').style.display = 'inline'; document.getElementById('2410.10899v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10899v2-abstract-full" style="display: none;"> By leveraging GPT-4 for ontology narration, we developed GPTON to infuse structured knowledge into LLMs through verbalized ontology terms, achieving accurate text and ontology annotations for over 68% of gene sets in the top five predictions. Manual evaluations confirm GPTON's robustness, highlighting its potential to harness LLMs and structured knowledge to significantly advance biomedical research beyond gene set annotation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10899v2-abstract-full').style.display = 'none'; document.getElementById('2410.10899v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> J.3; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07917">arXiv:2410.07917</a> <span> [<a href="https://arxiv.org/pdf/2410.07917">pdf</a>, <a href="https://arxiv.org/ps/2410.07917">ps</a>, <a href="https://arxiv.org/format/2410.07917">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Understanding Human Activity with Uncertainty Measure for Novelty in Graph Convolutional Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Burschka%2C+D">Darius Burschka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07917v1-abstract-short" style="display: inline;"> Understanding human activity is a crucial aspect of developing intelligent robots, particularly in the domain of human-robot collaboration. Nevertheless, existing systems encounter challenges such as over-segmentation, attributed to errors in the up-sampling process of the decoder. In response, we introduce a promising solution: the Temporal Fusion Graph Convolutional Network. This innovative appr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07917v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07917v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07917v1-abstract-full" style="display: none;"> Understanding human activity is a crucial aspect of developing intelligent robots, particularly in the domain of human-robot collaboration. Nevertheless, existing systems encounter challenges such as over-segmentation, attributed to errors in the up-sampling process of the decoder. In response, we introduce a promising solution: the Temporal Fusion Graph Convolutional Network. This innovative approach aims to rectify the inadequate boundary estimation of individual actions within an activity stream and mitigate the issue of over-segmentation in the temporal dimension. Moreover, systems leveraging human activity recognition frameworks for decision-making necessitate more than just the identification of actions. They require a confidence value indicative of the certainty regarding the correspondence between observations and training examples. This is crucial to prevent overly confident responses to unforeseen scenarios that were not part of the training data and may have resulted in mismatches due to weak similarity measures within the system. To address this, we propose the incorporation of a Spectral Normalized Residual connection aimed at enhancing efficient estimation of novelty in observations. This innovative approach ensures the preservation of input distance within the feature space by imposing constraints on the maximum gradients of weight updates. By limiting these gradients, we promote a more robust handling of novel situations, thereby mitigating the risks associated with overconfidence. Our methodology involves the use of a Gaussian process to quantify the distance in feature space. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07917v1-abstract-full').style.display = 'none'; document.getElementById('2410.07917v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 10 figures, The International Journal of Robotics Research</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07912">arXiv:2410.07912</a> <span> [<a href="https://arxiv.org/pdf/2410.07912">pdf</a>, <a href="https://arxiv.org/ps/2410.07912">ps</a>, <a href="https://arxiv.org/format/2410.07912">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Understanding Spatio-Temporal Relations in Human-Object Interaction using Pyramid Graph Convolutional Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Burschka%2C+D">Darius Burschka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07912v1-abstract-short" style="display: inline;"> Human activities recognition is an important task for an intelligent robot, especially in the field of human-robot collaboration, it requires not only the label of sub-activities but also the temporal structure of the activity. In order to automatically recognize both the label and the temporal structure in sequence of human-object interaction, we propose a novel Pyramid Graph Convolutional Networ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07912v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07912v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07912v1-abstract-full" style="display: none;"> Human activities recognition is an important task for an intelligent robot, especially in the field of human-robot collaboration, it requires not only the label of sub-activities but also the temporal structure of the activity. In order to automatically recognize both the label and the temporal structure in sequence of human-object interaction, we propose a novel Pyramid Graph Convolutional Network (PGCN), which employs a pyramidal encoder-decoder architecture consisting of an attention based graph convolution network and a temporal pyramid pooling module for downsampling and upsampling interaction sequence on the temporal axis, respectively. The system represents the 2D or 3D spatial relation of human and objects from the detection results in video data as a graph. To learn the human-object relations, a new attention graph convolutional network is trained to extract condensed information from the graph representation. To segment action into sub-actions, a novel temporal pyramid pooling module is proposed, which upsamples compressed features back to the original time scale and classifies actions per frame. We explore various attention layers, namely spatial attention, temporal attention and channel attention, and combine different upsampling decoders to test the performance on action recognition and segmentation. We evaluate our model on two challenging datasets in the field of human-object interaction recognition, i.e. Bimanual Actions and IKEA Assembly datasets. We demonstrate that our classifier significantly improves both framewise action recognition and segmentation, e.g., F1 micro and F1@50 scores on Bimanual Actions dataset are improved by $4.3\%$ and $8.5\%$ respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07912v1-abstract-full').style.display = 'none'; document.getElementById('2410.07912v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 6 figures, IROS 2022 conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03530">arXiv:2410.03530</a> <span> [<a href="https://arxiv.org/pdf/2410.03530">pdf</a>, <a href="https://arxiv.org/format/2410.03530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> PRF: Parallel Resonate and Fire Neuron for Long Sequence Learning in Spiking Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yulong Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zunchang Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+C">Changchun Feng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xiaopeng Lin</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+H">Hongwei Ren</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+H">Haotian Fu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yue Zhou</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+B">Bojun Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03530v2-abstract-short" style="display: inline;"> Recently, there is growing demand for effective and efficient long sequence modeling, with State Space Models (SSMs) proving to be effective for long sequence tasks. To further reduce energy consumption, SSMs can be adapted to Spiking Neural Networks (SNNs) using spiking functions. However, current spiking-formalized SSMs approaches still rely on float-point matrix-vector multiplication during inf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03530v2-abstract-full').style.display = 'inline'; document.getElementById('2410.03530v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03530v2-abstract-full" style="display: none;"> Recently, there is growing demand for effective and efficient long sequence modeling, with State Space Models (SSMs) proving to be effective for long sequence tasks. To further reduce energy consumption, SSMs can be adapted to Spiking Neural Networks (SNNs) using spiking functions. However, current spiking-formalized SSMs approaches still rely on float-point matrix-vector multiplication during inference, undermining SNNs' energy advantage. In this work, we address the efficiency and performance challenges of long sequence learning in SNNs simultaneously. First, we propose a decoupled reset method for parallel spiking neuron training, reducing the typical Leaky Integrate-and-Fire (LIF) model's training time from $O(L^2)$ to $O(L\log L)$, effectively speeding up the training by $6.57 \times$ to $16.50 \times$ on sequence lengths $1,024$ to $32,768$. To our best knowledge, this is the first time that parallel computation with a reset mechanism is implemented achieving equivalence to its sequential counterpart. Secondly, to capture long-range dependencies, we propose a Parallel Resonate and Fire (PRF) neuron, which leverages an oscillating membrane potential driven by a resonate mechanism from a differentiable reset function in the complex domain. The PRF enables efficient long sequence learning while maintaining parallel training. Finally, we demonstrate that the proposed spike-driven architecture using PRF achieves performance comparable to Structured SSMs (S4), with two orders of magnitude reduction in energy consumption, outperforming Transformer on Long Range Arena tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03530v2-abstract-full').style.display = 'none'; document.getElementById('2410.03530v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2208.04933 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04649">arXiv:2408.04649</a> <span> [<a href="https://arxiv.org/pdf/2408.04649">pdf</a>, <a href="https://arxiv.org/format/2408.04649">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Chain of Stance: Stance Detection with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+J">Junxia Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Changjiang Wang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hanwen Xing</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Dongming Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yazhou Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04649v1-abstract-short" style="display: inline;"> Stance detection is an active task in natural language processing (NLP) that aims to identify the author's stance towards a particular target within a text. Given the remarkable language understanding capabilities and encyclopedic prior knowledge of large language models (LLMs), how to explore the potential of LLMs in stance detection has received significant attention. Unlike existing LLM-based a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04649v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04649v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04649v1-abstract-full" style="display: none;"> Stance detection is an active task in natural language processing (NLP) that aims to identify the author's stance towards a particular target within a text. Given the remarkable language understanding capabilities and encyclopedic prior knowledge of large language models (LLMs), how to explore the potential of LLMs in stance detection has received significant attention. Unlike existing LLM-based approaches that focus solely on fine-tuning with large-scale datasets, we propose a new prompting method, called \textit{Chain of Stance} (CoS). In particular, it positions LLMs as expert stance detectors by decomposing the stance detection process into a series of intermediate, stance-related assertions that culminate in the final judgment. This approach leads to significant improvements in classification performance. We conducted extensive experiments using four SOTA LLMs on the SemEval 2016 dataset, covering the zero-shot and few-shot learning setups. The results indicate that the proposed method achieves state-of-the-art results with an F1 score of 79.84 in the few-shot setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04649v1-abstract-full').style.display = 'none'; document.getElementById('2408.04649v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02047">arXiv:2408.02047</a> <span> [<a href="https://arxiv.org/pdf/2408.02047">pdf</a>, <a href="https://arxiv.org/format/2408.02047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Latency-Aware Resource Allocation for Mobile Edge Generation and Computing via Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yinyu Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yanyan Shen</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02047v2-abstract-short" style="display: inline;"> Recently, the integration of mobile edge computing (MEC) and generative artificial intelligence (GAI) technology has given rise to a new area called mobile edge generation and computing (MEGC), which offers mobile users heterogeneous services such as task computing and content generation. In this letter, we investigate the joint communication, computation, and the AIGC resource allocation problem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02047v2-abstract-full').style.display = 'inline'; document.getElementById('2408.02047v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02047v2-abstract-full" style="display: none;"> Recently, the integration of mobile edge computing (MEC) and generative artificial intelligence (GAI) technology has given rise to a new area called mobile edge generation and computing (MEGC), which offers mobile users heterogeneous services such as task computing and content generation. In this letter, we investigate the joint communication, computation, and the AIGC resource allocation problem in an MEGC system. A latency minimization problem is first formulated to enhance the quality of service for mobile users. Due to the strong coupling of the optimization variables, we propose a new deep reinforcement learning-based algorithm to solve it efficiently. Numerical results demonstrate that the proposed algorithm can achieve lower latency than two baseline algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02047v2-abstract-full').style.display = 'none'; document.getElementById('2408.02047v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 6 figures. This paper has been accepted for publication by IEEE Networking Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19721">arXiv:2407.19721</a> <span> [<a href="https://arxiv.org/pdf/2407.19721">pdf</a>, <a href="https://arxiv.org/format/2407.19721">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Rina: Enhancing Ring-AllReduce with In-network Aggregation in Distributed Model Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zixuan Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xuandong Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Minglin Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yinfan Hu</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+H">Hao Mei</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huifeng Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wanxin Shi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sen Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19721v1-abstract-short" style="display: inline;"> Parameter Server (PS) and Ring-AllReduce (RAR) are two widely utilized synchronization architectures in multi-worker Deep Learning (DL), also referred to as Distributed Deep Learning (DDL). However, PS encounters challenges with the ``incast'' issue, while RAR struggles with problems caused by the long dependency chain. The emerging In-network Aggregation (INA) has been proposed to integrate with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19721v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19721v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19721v1-abstract-full" style="display: none;"> Parameter Server (PS) and Ring-AllReduce (RAR) are two widely utilized synchronization architectures in multi-worker Deep Learning (DL), also referred to as Distributed Deep Learning (DDL). However, PS encounters challenges with the ``incast'' issue, while RAR struggles with problems caused by the long dependency chain. The emerging In-network Aggregation (INA) has been proposed to integrate with PS to mitigate its incast issue. However, such PS-based INA has poor incremental deployment abilities as it requires replacing all the switches to show significant performance improvement, which is not cost-effective. In this study, we present the incorporation of INA capabilities into RAR, called RAR with In-Network Aggregation (Rina), to tackle both the problems above. Rina features its agent-worker mechanism. When an INA-capable ToR switch is deployed, all workers in this rack run as one abstracted worker with the help of the agent, resulting in both excellent incremental deployment capabilities and better throughput. We conducted extensive testbed and simulation evaluations to substantiate the throughput advantages of Rina over existing DDL training synchronization structures. Compared with the state-of-the-art PS-based INA methods ATP, Rina can achieve more than 50\% throughput with the same hardware cost. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19721v1-abstract-full').style.display = 'none'; document.getElementById('2407.19721v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in ICNP 2024. Preview version only</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15502">arXiv:2407.15502</a> <span> [<a href="https://arxiv.org/pdf/2407.15502">pdf</a>, <a href="https://arxiv.org/format/2407.15502">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> WebRPG: Automatic Web Rendering Parameters Generation for Visual Presentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+Z">Zirui Shao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+F">Feiyu Gao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hangdi Xing</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zepeng Zhu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhi Yu</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+J">Jiajun Bu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Q">Qi Zheng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+C">Cong Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15502v1-abstract-short" style="display: inline;"> In the era of content creation revolution propelled by advancements in generative models, the field of web design remains unexplored despite its critical role in modern digital communication. The web design process is complex and often time-consuming, especially for those with limited expertise. In this paper, we introduce Web Rendering Parameters Generation (WebRPG), a new task that aims at autom… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15502v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15502v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15502v1-abstract-full" style="display: none;"> In the era of content creation revolution propelled by advancements in generative models, the field of web design remains unexplored despite its critical role in modern digital communication. The web design process is complex and often time-consuming, especially for those with limited expertise. In this paper, we introduce Web Rendering Parameters Generation (WebRPG), a new task that aims at automating the generation for visual presentation of web pages based on their HTML code. WebRPG would contribute to a faster web development workflow. Since there is no existing benchmark available, we develop a new dataset for WebRPG through an automated pipeline. Moreover, we present baseline models, utilizing VAE to manage numerous elements and rendering parameters, along with custom HTML embedding for capturing essential semantic and hierarchical information from HTML. Extensive experiments, including customized quantitative evaluations for this specific task, are conducted to evaluate the quality of the generated results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15502v1-abstract-full').style.display = 'none'; document.getElementById('2407.15502v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ECCV 2024. The dataset and code can be accessed at https://github.com/AlibabaResearch/AdvancedLiterateMachinery/tree/main/DocumentUnderstanding/WebRPG</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07245">arXiv:2407.07245</a> <span> [<a href="https://arxiv.org/pdf/2407.07245">pdf</a>, <a href="https://arxiv.org/format/2407.07245">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Accelerating Mobile Edge Generation (MEG) by Constrained Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaoxia Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuanwei Liu</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+X">Xidong Mu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Nallanathan%2C+A">Arumugam Nallanathan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07245v2-abstract-short" style="display: inline;"> A novel accelerated mobile edge generation (MEG) framework is proposed for generating high-resolution images on mobile devices. Exploiting a large-scale latent diffusion model (LDM) distributed across edge server (ES) and user equipment (UE), cost-efficient artificial intelligence generated content (AIGC) is achieved by transmitting low-dimensional features between ES and UE. To reduce overheads o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07245v2-abstract-full').style.display = 'inline'; document.getElementById('2407.07245v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07245v2-abstract-full" style="display: none;"> A novel accelerated mobile edge generation (MEG) framework is proposed for generating high-resolution images on mobile devices. Exploiting a large-scale latent diffusion model (LDM) distributed across edge server (ES) and user equipment (UE), cost-efficient artificial intelligence generated content (AIGC) is achieved by transmitting low-dimensional features between ES and UE. To reduce overheads of both distributed computations and transmissions, a dynamic diffusion and feature merging scheme is conceived. By jointly optimizing the denoising steps and feature merging ratio, the image generation quality is maximized subject to latency and energy consumption constraints. To address this problem and tailor LDM sub-models, a low-complexity MEG acceleration protocol is developed. Particularly, a backbone meta-architecture is trained via offline distillation. Then, dynamic diffusion and feature merging are determined in online channel environment, which can be viewed as a constrained Markov Decision Process (MDP). A constrained variational policy optimization (CVPO) based MEG algorithm is further proposed for constraint-guaranteed learning, namely MEG-CVPO. Numerical results verify that: 1) The proposed framework can generate 1024$\times$1024 high-quality images over noisy channels while reducing over $40\%$ latency compared to conventional generation schemes. 2) The developed MEG-CVPO effectively mitigates constraint violations, thus flexibly controlling the trade-off between image distortion and generation costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07245v2-abstract-full').style.display = 'none'; document.getElementById('2407.07245v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18100">arXiv:2406.18100</a> <span> [<a href="https://arxiv.org/pdf/2406.18100">pdf</a>, <a href="https://arxiv.org/format/2406.18100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Natural Language but Omitted? On the Ineffectiveness of Large Language Models' privacy policy from End-users' Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuning Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haobin Xing</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xin Yi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hewu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18100v1-abstract-short" style="display: inline;"> LLMs driven products were increasingly prevalent in our daily lives, With a natural language based interaction style, people may potentially leak their personal private information. Thus, privacy policy and user agreement played an important role in regulating and alerting people. However, there lacked the work examining the reading of LLM's privacy policy. Thus, we conducted the first user study… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18100v1-abstract-full').style.display = 'inline'; document.getElementById('2406.18100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18100v1-abstract-full" style="display: none;"> LLMs driven products were increasingly prevalent in our daily lives, With a natural language based interaction style, people may potentially leak their personal private information. Thus, privacy policy and user agreement played an important role in regulating and alerting people. However, there lacked the work examining the reading of LLM's privacy policy. Thus, we conducted the first user study to let participants read the privacy policy and user agreement with two different styles (a cursory and detailed style). We found users lack important information upon cursory reading and even detailed reading. Besides, their privacy concerns was not solved even upon detailed reading. We provided four design implications based on the findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18100v1-abstract-full').style.display = 'none'; document.getElementById('2406.18100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11569">arXiv:2406.11569</a> <span> [<a href="https://arxiv.org/pdf/2406.11569">pdf</a>, <a href="https://arxiv.org/format/2406.11569">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Pre-Training and Personalized Fine-Tuning via Over-the-Air Federated Meta-Learning: Convergence-Generalization Trade-Offs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11569v3-abstract-short" style="display: inline;"> For modern artificial intelligence (AI) applications such as large language models (LLMs), the training paradigm has recently shifted to pre-training followed by fine-tuning. Furthermore, owing to dwindling open repositories of data and thanks to efforts to democratize access to AI models, pre-training is expected to increasingly migrate from the current centralized deployments to federated learni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11569v3-abstract-full').style.display = 'inline'; document.getElementById('2406.11569v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11569v3-abstract-full" style="display: none;"> For modern artificial intelligence (AI) applications such as large language models (LLMs), the training paradigm has recently shifted to pre-training followed by fine-tuning. Furthermore, owing to dwindling open repositories of data and thanks to efforts to democratize access to AI models, pre-training is expected to increasingly migrate from the current centralized deployments to federated learning (FL) implementations. Meta-learning provides a general framework in which pre-training and fine-tuning can be formalized. Meta-learning-based personalized FL (meta-pFL) moves beyond basic personalization by targeting generalization to new agents and tasks. This paper studies the generalization performance of meta-pFL for a wireless setting in which the agents participating in the pre-training phase, i.e., meta-learning, are connected via a shared wireless channel to the server. Adopting over-the-air computing, we study the trade-off between generalization to new agents and tasks, on the one hand, and convergence, on the other hand. The trade-off arises from the fact that channel impairments may enhance generalization, while degrading convergence. Extensive numerical results validate the theory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11569v3-abstract-full').style.display = 'none'; document.getElementById('2406.11569v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">39 pages, 7 figures, submitted for possible journal publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08754">arXiv:2406.08754</a> <span> [<a href="https://arxiv.org/pdf/2406.08754">pdf</a>, <a href="https://arxiv.org/format/2406.08754">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> StructuralSleight: Automated Jailbreak Attacks on Large Language Models Utilizing Uncommon Text-Organization Structures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+B">Bangxin Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hengrui Xing</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+C">Cong Tian</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+J">Jin Qian</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+H">Huangqing Xiao</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+L">Linfeng Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08754v3-abstract-short" style="display: inline;"> Large Language Models (LLMs) are widely used in natural language processing but face the risk of jailbreak attacks that maliciously induce them to generate harmful content. Existing jailbreak attacks, including character-level and context-level attacks, mainly focus on the prompt of plain text without specifically exploring the significant influence of its structure. In this paper, we focus on stu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08754v3-abstract-full').style.display = 'inline'; document.getElementById('2406.08754v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08754v3-abstract-full" style="display: none;"> Large Language Models (LLMs) are widely used in natural language processing but face the risk of jailbreak attacks that maliciously induce them to generate harmful content. Existing jailbreak attacks, including character-level and context-level attacks, mainly focus on the prompt of plain text without specifically exploring the significant influence of its structure. In this paper, we focus on studying how the prompt structure contributes to the jailbreak attack. We introduce a novel structure-level attack method based on long-tailed structures, which we refer to as Uncommon Text-Organization Structures (UTOS). We extensively study 12 UTOS templates and 6 obfuscation methods to build an effective automated jailbreak tool named StructuralSleight that contains three escalating attack strategies: Structural Attack, Structural and Character/Context Obfuscation Attack, and Fully Obfuscated Structural Attack. Extensive experiments on existing LLMs show that StructuralSleight significantly outperforms the baseline methods. In particular, the attack success rate reaches 94.62\% on GPT-4o, which has not been addressed by state-of-the-art techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08754v3-abstract-full').style.display = 'none'; document.getElementById('2406.08754v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02266">arXiv:2406.02266</a> <span> [<a href="https://arxiv.org/pdf/2406.02266">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Retrieval-Augmented LMs with a Two-stage Consistency Learning Compressor </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chuankai Xu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Dongming Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bo Wang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hanwen Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02266v1-abstract-short" style="display: inline;"> Despite the prevalence of retrieval-augmented language models (RALMs), the seamless integration of these models with retrieval mechanisms to enhance performance in document-based tasks remains challenging. While some post-retrieval processing Retrieval-Augmented Generation (RAG) methods have achieved success, most still lack the ability to distinguish pertinent from extraneous information, leading… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02266v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02266v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02266v1-abstract-full" style="display: none;"> Despite the prevalence of retrieval-augmented language models (RALMs), the seamless integration of these models with retrieval mechanisms to enhance performance in document-based tasks remains challenging. While some post-retrieval processing Retrieval-Augmented Generation (RAG) methods have achieved success, most still lack the ability to distinguish pertinent from extraneous information, leading to potential inconsistencies and reduced precision in the generated output, which subsequently affects the truthfulness of the language model's responses. To address these limitations, this work proposes a novel two-stage consistency learning approach for retrieved information compression in retrieval-augmented language models to enhance performance. By incorporating consistency learning, the aim is to generate summaries that maintain coherence and alignment with the intended semantic representations of a teacher model while improving faithfulness to the original retrieved documents. The proposed method is empirically validated across multiple datasets, demonstrating notable enhancements in precision and efficiency for question-answering tasks. It outperforms existing baselines and showcases the synergistic effects of combining contrastive and consistency learning paradigms within the retrieval-augmented generation framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02266v1-abstract-full').style.display = 'none'; document.getElementById('2406.02266v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14709">arXiv:2405.14709</a> <span> [<a href="https://arxiv.org/pdf/2405.14709">pdf</a>, <a href="https://arxiv.org/format/2405.14709">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> OpFlowTalker: Realistic and Natural Talking Face Generation via Optical Flow Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ge%2C+S">Shuheng Ge</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haoyu Xing</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiangqian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14709v2-abstract-short" style="display: inline;"> Creating realistic, natural, and lip-readable talking face videos remains a formidable challenge. Previous research primarily concentrated on generating and aligning single-frame images while overlooking the smoothness of frame-to-frame transitions and temporal dependencies. This often compromised visual quality and effects in practical settings, particularly when handling complex facial data and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14709v2-abstract-full').style.display = 'inline'; document.getElementById('2405.14709v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14709v2-abstract-full" style="display: none;"> Creating realistic, natural, and lip-readable talking face videos remains a formidable challenge. Previous research primarily concentrated on generating and aligning single-frame images while overlooking the smoothness of frame-to-frame transitions and temporal dependencies. This often compromised visual quality and effects in practical settings, particularly when handling complex facial data and audio content, which frequently led to semantically incongruent visual illusions. Specifically, synthesized videos commonly featured disorganized lip movements, making them difficult to understand and recognize. To overcome these limitations, this paper introduces the application of optical flow to guide facial image generation, enhancing inter-frame continuity and semantic consistency. We propose "OpFlowTalker", a novel approach that utilizes predicted optical flow changes from audio inputs rather than direct image predictions. This method smooths image transitions and aligns changes with semantic content. Moreover, it employs a sequence fusion technique to replace the independent generation of single frames, thus preserving contextual information and maintaining temporal coherence. We also developed an optical flow synchronization module that regulates both full-face and lip movements, optimizing visual synthesis by balancing regional dynamics. Furthermore, we introduce a Visual Text Consistency Score (VTCS) that accurately measures lip-readability in synthesized videos. Extensive empirical evidence validates the effectiveness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14709v2-abstract-full').style.display = 'none'; document.getElementById('2405.14709v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.00736">arXiv:2405.00736</a> <span> [<a href="https://arxiv.org/pdf/2405.00736">pdf</a>, <a href="https://arxiv.org/format/2405.00736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Joint Signal Detection and Automatic Modulation Classification via Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+S">Shuo Chang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zixun Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.00736v1-abstract-short" style="display: inline;"> Signal detection and modulation classification are two crucial tasks in various wireless communication systems. Different from prior works that investigate them independently, this paper studies the joint signal detection and automatic modulation classification (AMC) by considering a realistic and complex scenario, in which multiple signals with different modulation schemes coexist at different ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00736v1-abstract-full').style.display = 'inline'; document.getElementById('2405.00736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.00736v1-abstract-full" style="display: none;"> Signal detection and modulation classification are two crucial tasks in various wireless communication systems. Different from prior works that investigate them independently, this paper studies the joint signal detection and automatic modulation classification (AMC) by considering a realistic and complex scenario, in which multiple signals with different modulation schemes coexist at different carrier frequencies. We first generate a coexisting RADIOML dataset (CRML23) to facilitate the joint design. Different from the publicly available AMC dataset ignoring the signal detection step and containing only one signal, our synthetic dataset covers the more realistic multiple-signal coexisting scenario. Then, we present a joint framework for detection and classification (JDM) for such a multiple-signal coexisting environment, which consists of two modules for signal detection and AMC, respectively. In particular, these two modules are interconnected using a designated data structure called "proposal". Finally, we conduct extensive simulations over the newly developed dataset, which demonstrate the effectiveness of our designs. Our code and dataset are now available as open-source (https://github.com/Singingkettle/ChangShuoRadioData). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00736v1-abstract-full').style.display = 'none'; document.getElementById('2405.00736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16891">arXiv:2404.16891</a> <span> [<a href="https://arxiv.org/pdf/2404.16891">pdf</a>, <a href="https://arxiv.org/format/2404.16891">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Attacks on Third-Party APIs of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wanru Zhao</a>, <a href="/search/cs?searchtype=author&query=Khazanchi%2C+V">Vidit Khazanchi</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haodi Xing</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qiongkai Xu</a>, <a href="/search/cs?searchtype=author&query=Lane%2C+N+D">Nicholas Donald Lane</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16891v1-abstract-short" style="display: inline;"> Large language model (LLM) services have recently begun offering a plugin ecosystem to interact with third-party API services. This innovation enhances the capabilities of LLMs, but it also introduces risks, as these plugins developed by various third parties cannot be easily trusted. This paper proposes a new attacking framework to examine security and safety vulnerabilities within LLM platforms… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16891v1-abstract-full').style.display = 'inline'; document.getElementById('2404.16891v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16891v1-abstract-full" style="display: none;"> Large language model (LLM) services have recently begun offering a plugin ecosystem to interact with third-party API services. This innovation enhances the capabilities of LLMs, but it also introduces risks, as these plugins developed by various third parties cannot be easily trusted. This paper proposes a new attacking framework to examine security and safety vulnerabilities within LLM platforms that incorporate third-party services. Applying our framework specifically to widely used LLMs, we identify real-world malicious attacks across various domains on third-party APIs that can imperceptibly modify LLM outputs. The paper discusses the unique challenges posed by third-party API integration and offers strategic possibilities to improve the security and safety of LLM ecosystems moving forward. Our code is released at https://github.com/vk0812/Third-Party-Attacks-on-LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16891v1-abstract-full').style.display = 'none'; document.getElementById('2404.16891v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2024 Workshop on Secure and Trustworthy Large Language Models</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.14656">arXiv:2401.14656</a> <span> [<a href="https://arxiv.org/pdf/2401.14656">pdf</a>, <a href="https://arxiv.org/format/2401.14656">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Scientific Large Language Models: A Survey on Biological & Chemical Domains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Keyang Ding</a>, <a href="/search/cs?searchtype=author&query=Lyv%2C+T">Tianwen Lyv</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinda Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Q">Qingyu Yin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yiwen Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jing Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhao Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaotong Li</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+Z">Zhuoyi Xiang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+K">Kehua Feng</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+X">Xiang Zhuang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zeyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+M">Ming Qin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mengyao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jinlu Zhang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+J">Jiyu Cui</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tao Huang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+P">Pengju Yan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Renjun Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyang Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaolin Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xiaohui Fan</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huabin Xing</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.14656v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have emerged as a transformative power in enhancing natural language comprehension, representing a significant stride toward artificial general intelligence. The application of LLMs extends beyond conventional linguistic boundaries, encompassing specialized linguistic systems developed within various scientific disciplines. This growing interest has led to the advent o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14656v2-abstract-full').style.display = 'inline'; document.getElementById('2401.14656v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.14656v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have emerged as a transformative power in enhancing natural language comprehension, representing a significant stride toward artificial general intelligence. The application of LLMs extends beyond conventional linguistic boundaries, encompassing specialized linguistic systems developed within various scientific disciplines. This growing interest has led to the advent of scientific LLMs, a novel subclass specifically engineered for facilitating scientific discovery. As a burgeoning area in the community of AI for Science, scientific LLMs warrant comprehensive exploration. However, a systematic and up-to-date survey introducing them is currently lacking. In this paper, we endeavor to methodically delineate the concept of "scientific language", whilst providing a thorough review of the latest advancements in scientific LLMs. Given the expansive realm of scientific disciplines, our analysis adopts a focused lens, concentrating on the biological and chemical domains. This includes an in-depth examination of LLMs for textual knowledge, small molecules, macromolecular proteins, genomic sequences, and their combinations, analyzing them in terms of model architectures, capabilities, datasets, and evaluation. Finally, we critically examine the prevailing challenges and point out promising research directions along with the advances of LLMs. By offering a comprehensive overview of technical developments in this field, this survey aspires to be an invaluable resource for researchers navigating the intricate landscape of scientific LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14656v2-abstract-full').style.display = 'none'; document.getElementById('2401.14656v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01522">arXiv:2401.01522</a> <span> [<a href="https://arxiv.org/pdf/2401.01522">pdf</a>, <a href="https://arxiv.org/format/2401.01522">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LORE++: Logical Location Regression Network for Table Structure Recognition with Pre-training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Long%2C+R">Rujiao Long</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hangdi Xing</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhibo Yang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Q">Qi Zheng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhi Yu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+C">Cong Yao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fei Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01522v1-abstract-short" style="display: inline;"> Table structure recognition (TSR) aims at extracting tables in images into machine-understandable formats. Recent methods solve this problem by predicting the adjacency relations of detected cell boxes or learning to directly generate the corresponding markup sequences from the table images. However, existing approaches either count on additional heuristic rules to recover the table structures, or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01522v1-abstract-full').style.display = 'inline'; document.getElementById('2401.01522v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01522v1-abstract-full" style="display: none;"> Table structure recognition (TSR) aims at extracting tables in images into machine-understandable formats. Recent methods solve this problem by predicting the adjacency relations of detected cell boxes or learning to directly generate the corresponding markup sequences from the table images. However, existing approaches either count on additional heuristic rules to recover the table structures, or face challenges in capturing long-range dependencies within tables, resulting in increased complexity. In this paper, we propose an alternative paradigm. We model TSR as a logical location regression problem and propose a new TSR framework called LORE, standing for LOgical location REgression network, which for the first time regresses logical location as well as spatial location of table cells in a unified network. Our proposed LORE is conceptually simpler, easier to train, and more accurate than other paradigms of TSR. Moreover, inspired by the persuasive success of pre-trained models on a number of computer vision and natural language processing tasks, we propose two pre-training tasks to enrich the spatial and logical representations at the feature level of LORE, resulting in an upgraded version called LORE++. The incorporation of pre-training in LORE++ has proven to enjoy significant advantages, leading to a substantial enhancement in terms of accuracy, generalization, and few-shot capability compared to its predecessor. Experiments on standard benchmarks against methods of previous paradigms demonstrate the superiority of LORE++, which highlights the potential and promising prospect of the logical location regression paradigm for TSR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01522v1-abstract-full').style.display = 'none'; document.getElementById('2401.01522v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2303.03730</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.16606">arXiv:2310.16606</a> <span> [<a href="https://arxiv.org/pdf/2310.16606">pdf</a>, <a href="https://arxiv.org/ps/2310.16606">ps</a>, <a href="https://arxiv.org/format/2310.16606">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AirFL-Mem: Improving Communication-Learning Trade-Off by Long-Term Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.16606v2-abstract-short" style="display: inline;"> Addressing the communication bottleneck inherent in federated learning (FL), over-the-air FL (AirFL) has emerged as a promising solution, which is, however, hampered by deep fading conditions. In this paper, we propose AirFL-Mem, a novel scheme designed to mitigate the impact of deep fading by implementing a \emph{long-term} memory mechanism. Convergence bounds are provided that account for long-t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16606v2-abstract-full').style.display = 'inline'; document.getElementById('2310.16606v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.16606v2-abstract-full" style="display: none;"> Addressing the communication bottleneck inherent in federated learning (FL), over-the-air FL (AirFL) has emerged as a promising solution, which is, however, hampered by deep fading conditions. In this paper, we propose AirFL-Mem, a novel scheme designed to mitigate the impact of deep fading by implementing a \emph{long-term} memory mechanism. Convergence bounds are provided that account for long-term memory, as well as for existing AirFL variants with short-term memory, for general non-convex objectives. The theory demonstrates that AirFL-Mem exhibits the same convergence rate of federated averaging (FedAvg) with ideal communication, while the performance of existing schemes is generally limited by error floors. The theoretical results are also leveraged to propose a novel convex optimization strategy for the truncation threshold used for power control in the presence of Rayleigh fading channels. Experimental results validate the analysis, confirming the advantages of a long-term memory mechanism for the mitigation of deep fading. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16606v2-abstract-full').style.display = 'none'; document.getElementById('2310.16606v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures, submitted for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.09533">arXiv:2310.09533</a> <span> [<a href="https://arxiv.org/pdf/2310.09533">pdf</a>, <a href="https://arxiv.org/format/2310.09533">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards End-to-End Unsupervised Saliency Detection with Self-Supervised Top-Down Context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yicheng Song</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shuyong Gao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haozhe Xing</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yiting Cheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenqiang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.09533v1-abstract-short" style="display: inline;"> Unsupervised salient object detection aims to detect salient objects without using supervision signals eliminating the tedious task of manually labeling salient objects. To improve training efficiency, end-to-end methods for USOD have been proposed as a promising alternative. However, current solutions rely heavily on noisy handcraft labels and fail to mine rich semantic information from deep feat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09533v1-abstract-full').style.display = 'inline'; document.getElementById('2310.09533v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.09533v1-abstract-full" style="display: none;"> Unsupervised salient object detection aims to detect salient objects without using supervision signals eliminating the tedious task of manually labeling salient objects. To improve training efficiency, end-to-end methods for USOD have been proposed as a promising alternative. However, current solutions rely heavily on noisy handcraft labels and fail to mine rich semantic information from deep features. In this paper, we propose a self-supervised end-to-end salient object detection framework via top-down context. Specifically, motivated by contrastive learning, we exploit the self-localization from the deepest feature to construct the location maps which are then leveraged to learn the most instructive segmentation guidance. Further considering the lack of detailed information in deepest features, we exploit the detail-boosting refiner module to enrich the location labels with details. Moreover, we observe that due to lack of supervision, current unsupervised saliency models tend to detect non-salient objects that are salient in some other samples of corresponding scenarios. To address this widespread issue, we design a novel Unsupervised Non-Salient Suppression (UNSS) method developing the ability to ignore non-salient objects. Extensive experiments on benchmark datasets demonstrate that our method achieves leading performance among the recent end-to-end methods and most of the multi-stage solutions. The code is available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09533v1-abstract-full').style.display = 'none'; document.getElementById('2310.09533v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by ACM MM 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.05357">arXiv:2307.05357</a> <span> [<a href="https://arxiv.org/pdf/2307.05357">pdf</a>, <a href="https://arxiv.org/format/2307.05357">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Over-the-Air Computation in OFDM Systems with Imperfect Channel State Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yilong Chen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Lexi Xu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.05357v1-abstract-short" style="display: inline;"> This paper studies the over-the-air computation (AirComp) in an orthogonal frequency division multiplexing (OFDM) system with imperfect channel state information (CSI), in which multiple single-antenna wireless devices (WDs) simultaneously send uncoded signals to a multi-antenna access point (AP) for distributed functional computation over multiple subcarriers. In particular, we consider two scena… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05357v1-abstract-full').style.display = 'inline'; document.getElementById('2307.05357v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.05357v1-abstract-full" style="display: none;"> This paper studies the over-the-air computation (AirComp) in an orthogonal frequency division multiplexing (OFDM) system with imperfect channel state information (CSI), in which multiple single-antenna wireless devices (WDs) simultaneously send uncoded signals to a multi-antenna access point (AP) for distributed functional computation over multiple subcarriers. In particular, we consider two scenarios with best-effort and error-constrained computation tasks, with the objectives of minimizing the average computation mean squared error (MSE) and the computation outage probability over the multiple subcarriers, respectively. Towards this end, we jointly optimize the transmit coefficients at the WDs and the receive beamforming vectors at the AP over subcarriers, subject to the maximum transmit power constraints at individual WDs. First, for the special case with a single receive antenna at the AP, we propose the semi-closed-form globally optimal solutions to the two problems using the Lagrange-duality method. It is shown that at each subcarrier, the WDs' optimized power control policy for average MSE minimization follows a regularized channel inversion structure, while that for computation outage probability minimization follows an on-off regularized channel inversion, with the regularization dependent on the transmit power budget and channel estimation error. Next, for the general case with multiple receive antennas at the AP, we present efficient algorithms based on alternating optimization and convex optimization to find converged solutions to both problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05357v1-abstract-full').style.display = 'none'; document.getElementById('2307.05357v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.14109">arXiv:2306.14109</a> <span> [<a href="https://arxiv.org/pdf/2306.14109">pdf</a>, <a href="https://arxiv.org/format/2306.14109">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> When SAM Meets Sonar Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lin Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiufen Ye</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Liqiang Zhu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W">Weijie Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianguo Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huiming Xing</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chao Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.14109v1-abstract-short" style="display: inline;"> Segment Anything Model (SAM) has revolutionized the way of segmentation. However, SAM's performance may decline when applied to tasks involving domains that differ from natural images. Nonetheless, by employing fine-tuning techniques, SAM exhibits promising capabilities in specific domains, such as medicine and planetary science. Notably, there is a lack of research on the application of SAM to so… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14109v1-abstract-full').style.display = 'inline'; document.getElementById('2306.14109v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.14109v1-abstract-full" style="display: none;"> Segment Anything Model (SAM) has revolutionized the way of segmentation. However, SAM's performance may decline when applied to tasks involving domains that differ from natural images. Nonetheless, by employing fine-tuning techniques, SAM exhibits promising capabilities in specific domains, such as medicine and planetary science. Notably, there is a lack of research on the application of SAM to sonar imaging. In this paper, we aim to address this gap by conducting a comprehensive investigation of SAM's performance on sonar images. Specifically, we evaluate SAM using various settings on sonar images. Additionally, we fine-tune SAM using effective methods both with prompts and for semantic segmentation, thereby expanding its applicability to tasks requiring automated segmentation. Experimental results demonstrate a significant improvement in the performance of the fine-tuned SAM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14109v1-abstract-full').style.display = 'none'; document.getElementById('2306.14109v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06603">arXiv:2306.06603</a> <span> [<a href="https://arxiv.org/pdf/2306.06603">pdf</a>, <a href="https://arxiv.org/ps/2306.06603">ps</a>, <a href="https://arxiv.org/format/2306.06603">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Task-Oriented Integrated Sensing, Computation and Communication for Wireless Edge AI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+G">Guangxu Zhu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dongzhu Liu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaibin Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+K">Kaishun Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06603v1-abstract-short" style="display: inline;"> With the advent of emerging IoT applications such as autonomous driving, digital-twin and metaverse etc. featuring massive data sensing, analyzing and inference as well critical latency in beyond 5G (B5G) networks, edge artificial intelligence (AI) has been proposed to provide high-performance computation of a conventional cloud down to the network edge. Recently, convergence of wireless sensing,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06603v1-abstract-full').style.display = 'inline'; document.getElementById('2306.06603v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06603v1-abstract-full" style="display: none;"> With the advent of emerging IoT applications such as autonomous driving, digital-twin and metaverse etc. featuring massive data sensing, analyzing and inference as well critical latency in beyond 5G (B5G) networks, edge artificial intelligence (AI) has been proposed to provide high-performance computation of a conventional cloud down to the network edge. Recently, convergence of wireless sensing, computation and communication (SC${}^2$) for specific edge AI tasks, has aroused paradigm shift by enabling (partial) sharing of the radio-frequency (RF) transceivers and information processing pipelines among these three fundamental functionalities of IoT. However, most existing design frameworks separate these designs incurring unnecessary signaling overhead and waste of energy, and it is therefore of paramount importance to advance fully integrated sensing, computation and communication (ISCC) to achieve ultra-reliable and low-latency edge intelligence acquisition. In this article, we provide an overview of principles of enabling ISCC technologies followed by two concrete use cases of edge AI tasks demonstrating the advantage of task-oriented ISCC, and pointed out some practical challenges in edge AI design with advanced ISCC solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06603v1-abstract-full').style.display = 'none'; document.getElementById('2306.06603v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 6 figures, submitted for possible journal publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.11135">arXiv:2305.11135</a> <span> [<a href="https://arxiv.org/pdf/2305.11135">pdf</a>, <a href="https://arxiv.org/format/2305.11135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Convergence Analysis of Over-the-Air FL with Compression and Power Control via Clipping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.11135v1-abstract-short" style="display: inline;"> One of the key challenges towards the deployment of over-the-air federated learning (AirFL) is the design of mechanisms that can comply with the power and bandwidth constraints of the shared channel, while causing minimum deterioration to the learning performance as compared to baseline noiseless implementations. For additive white Gaussian noise (AWGN) channels with instantaneous per-device power… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11135v1-abstract-full').style.display = 'inline'; document.getElementById('2305.11135v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.11135v1-abstract-full" style="display: none;"> One of the key challenges towards the deployment of over-the-air federated learning (AirFL) is the design of mechanisms that can comply with the power and bandwidth constraints of the shared channel, while causing minimum deterioration to the learning performance as compared to baseline noiseless implementations. For additive white Gaussian noise (AWGN) channels with instantaneous per-device power constraints, prior work has demonstrated the optimality of a power control mechanism based on norm clipping. This was done through the minimization of an upper bound on the optimality gap for smooth learning objectives satisfying the Polyak-艁ojasiewicz (PL) condition. In this paper, we make two contributions to the development of AirFL based on norm clipping, which we refer to as AirFL-Clip. First, we provide a convergence bound for AirFLClip that applies to general smooth and non-convex learning objectives. Unlike existing results, the derived bound is free from run-specific parameters, thus supporting an offline evaluation. Second, we extend AirFL-Clip to include Top-k sparsification and linear compression. For this generalized protocol, referred to as AirFL-Clip-Comp, we derive a convergence bound for general smooth and non-convex learning objectives. We argue, and demonstrate via experiments, that the only time-varying quantities present in the bound can be efficiently estimated offline by leveraging the well-studied properties of sparse recovery algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11135v1-abstract-full').style.display = 'none'; document.getElementById('2305.11135v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures, submitted for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.08468">arXiv:2305.08468</a> <span> [<a href="https://arxiv.org/pdf/2305.08468">pdf</a>, <a href="https://arxiv.org/format/2305.08468">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3589785">10.1145/3589785 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PolarDB-IMCI: A Cloud-Native HTAP Database System at Alibaba </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianying Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tongliang Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Haoze Song</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xinjun Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wenchao Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Feifei Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+B">Baoyue Yan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qianqian Wu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yukun Liang</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+C">Chengjun Ying</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yujie Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Baokai Chen</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+C">Chang Cai</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+Y">Yubin Ruan</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+X">Xiaoyi Weng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shibin Chen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+L">Liang Yin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chengzhong Yang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xin Cai</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hongyan Xing</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+N">Nanlong Yu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaofei Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+D">Dapeng Huang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jianling Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.08468v1-abstract-short" style="display: inline;"> Cloud-native databases have become the de-facto choice for mission-critical applications on the cloud due to the need for high availability, resource elasticity, and cost efficiency. Meanwhile, driven by the increasing connectivity between data generation and analysis, users prefer a single database to efficiently process both OLTP and OLAP workloads, which enhances data freshness and reduces the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08468v1-abstract-full').style.display = 'inline'; document.getElementById('2305.08468v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.08468v1-abstract-full" style="display: none;"> Cloud-native databases have become the de-facto choice for mission-critical applications on the cloud due to the need for high availability, resource elasticity, and cost efficiency. Meanwhile, driven by the increasing connectivity between data generation and analysis, users prefer a single database to efficiently process both OLTP and OLAP workloads, which enhances data freshness and reduces the complexity of data synchronization and the overall business cost. In this paper, we summarize five crucial design goals for a cloud-native HTAP database based on our experience and customers' feedback, i.e., transparency, competitive OLAP performance, minimal perturbation on OLTP workloads, high data freshness, and excellent resource elasticity. As our solution to realize these goals, we present PolarDB-IMCI, a cloud-native HTAP database system designed and deployed at Alibaba Cloud. Our evaluation results show that PolarDB-IMCI is able to handle HTAP efficiently on both experimental and production workloads; notably, it speeds up analytical queries up to $\times149$ on TPC-H (100 $GB$). PolarDB-IMCI introduces low visibility delay and little performance perturbation on OLTP workloads (< 5%), and resource elasticity can be achieved by scaling out in tens of seconds. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08468v1-abstract-full').style.display = 'none'; document.getElementById('2305.08468v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 16 figures, to be published in ACM SIGMOD 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.03730">arXiv:2303.03730</a> <span> [<a href="https://arxiv.org/pdf/2303.03730">pdf</a>, <a href="https://arxiv.org/format/2303.03730">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LORE: Logical Location Regression Network for Table Structure Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hangdi Xing</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+F">Feiyu Gao</a>, <a href="/search/cs?searchtype=author&query=Long%2C+R">Rujiao Long</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+J">Jiajun Bu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Q">Qi Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Liangcheng Li</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+C">Cong Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhi Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.03730v1-abstract-short" style="display: inline;"> Table structure recognition (TSR) aims at extracting tables in images into machine-understandable formats. Recent methods solve this problem by predicting the adjacency relations of detected cell boxes, or learning to generate the corresponding markup sequences from the table images. However, they either count on additional heuristic rules to recover the table structures, or require a huge amount… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.03730v1-abstract-full').style.display = 'inline'; document.getElementById('2303.03730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.03730v1-abstract-full" style="display: none;"> Table structure recognition (TSR) aims at extracting tables in images into machine-understandable formats. Recent methods solve this problem by predicting the adjacency relations of detected cell boxes, or learning to generate the corresponding markup sequences from the table images. However, they either count on additional heuristic rules to recover the table structures, or require a huge amount of training data and time-consuming sequential decoders. In this paper, we propose an alternative paradigm. We model TSR as a logical location regression problem and propose a new TSR framework called LORE, standing for LOgical location REgression network, which for the first time combines logical location regression together with spatial location regression of table cells. Our proposed LORE is conceptually simpler, easier to train and more accurate than previous TSR models of other paradigms. Experiments on standard benchmarks demonstrate that LORE consistently outperforms prior arts. Code is available at https:// github.com/AlibabaResearch/AdvancedLiterateMachinery/tree/main/DocumentUnderstanding/LORE-TSR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.03730v1-abstract-full').style.display = 'none'; document.getElementById('2303.03730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.13546">arXiv:2301.13546</a> <span> [<a href="https://arxiv.org/pdf/2301.13546">pdf</a>, <a href="https://arxiv.org/ps/2301.13546">ps</a>, <a href="https://arxiv.org/format/2301.13546">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Joint Task Offloading and Cache Placement for Energy-Efficient Mobile Edge Computing Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jingxuan Liang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/cs?searchtype=author&query=Lau%2C+V+K+N">Vincent K. N. Lau</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.13546v1-abstract-short" style="display: inline;"> This letter investigates a cache-enabled multiuser mobile edge computing (MEC) system with dynamic task arrivals, taking into account the impact of proactive cache placement on the system's overall energy consumption. We consider that an access point (AP) schedules a wireless device (WD) to offload computational tasks while executing the tasks of a finite library in the \emph{task caching} phase,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13546v1-abstract-full').style.display = 'inline'; document.getElementById('2301.13546v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.13546v1-abstract-full" style="display: none;"> This letter investigates a cache-enabled multiuser mobile edge computing (MEC) system with dynamic task arrivals, taking into account the impact of proactive cache placement on the system's overall energy consumption. We consider that an access point (AP) schedules a wireless device (WD) to offload computational tasks while executing the tasks of a finite library in the \emph{task caching} phase, such that the nearby WDs with the same task request arriving later can directly download the task results in the \emph{task arrival and execution} phase. We aim for minimizing the system's weighted-sum energy over a finite-time horizon, by jointly optimizing the task caching decision and the MEC execution of the AP, and local computing as well as task offloading of the WDs at each time slot, subject to caching capacity, task causality, and completion deadline constraints. The formulated design problem is a mixed-integer nonlinear program. Under the assumption of fully predicable task arrivals, we first propose a branch-and-bound (BnB) based method to obtain the optimal offline solution. Next, we propose two low-complexity schemes based on convex relaxation and task-popularity, respectively. Finally, numerical results show the benefit of the proposed schemes over existing benchmark schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13546v1-abstract-full').style.display = 'none'; document.getElementById('2301.13546v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, accepted for publication in WCL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.02989">arXiv:2208.02989</a> <span> [<a href="https://arxiv.org/pdf/2208.02989">pdf</a>, <a href="https://arxiv.org/format/2208.02989">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Covariant-Contravariant Refinement Modal $渭$-calculus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huili Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.02989v1-abstract-short" style="display: inline;"> The notion of covariant-contravariant refinement (CC-refinement, for short) is a generalization of the notions of bisimulation, simulation and refinement. This paper introduces CC-refinement modal $渭$-calculus (CCRML$^渭$) obtained from the modal $渭$-calculus system K$^渭$ by adding CC-refinement quantifiers, establishes an axiom system for CCRML$^渭$ and explores the important properties: soundness,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02989v1-abstract-full').style.display = 'inline'; document.getElementById('2208.02989v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.02989v1-abstract-full" style="display: none;"> The notion of covariant-contravariant refinement (CC-refinement, for short) is a generalization of the notions of bisimulation, simulation and refinement. This paper introduces CC-refinement modal $渭$-calculus (CCRML$^渭$) obtained from the modal $渭$-calculus system K$^渭$ by adding CC-refinement quantifiers, establishes an axiom system for CCRML$^渭$ and explores the important properties: soundness, completeness and decidability of this axiom system. The language of CCRML$^渭$ may be considered as a specification language for describing the properties of a system referring to reactive and generative actions. It may be used to formalize some interesting problems in the field of formal methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02989v1-abstract-full').style.display = 'none'; document.getElementById('2208.02989v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.07795">arXiv:2207.07795</a> <span> [<a href="https://arxiv.org/pdf/2207.07795">pdf</a>, <a href="https://arxiv.org/format/2207.07795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3503161.3548344">10.1145/3503161.3548344 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> RCRN: Real-world Character Image Restoration Network via Skeleton Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+D">Daqian Shi</a>, <a href="/search/cs?searchtype=author&query=Diao%2C+X">Xiaolei Diao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hao Tang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaomin Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hao Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.07795v2-abstract-short" style="display: inline;"> Constructing high-quality character image datasets is challenging because real-world images are often affected by image degradation. There are limitations when applying current image restoration methods to such real-world character images, since (i) the categories of noise in character images are different from those in general images; (ii) real-world character images usually contain more complex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07795v2-abstract-full').style.display = 'inline'; document.getElementById('2207.07795v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.07795v2-abstract-full" style="display: none;"> Constructing high-quality character image datasets is challenging because real-world images are often affected by image degradation. There are limitations when applying current image restoration methods to such real-world character images, since (i) the categories of noise in character images are different from those in general images; (ii) real-world character images usually contain more complex image degradation, e.g., mixed noise at different noise levels. To address these problems, we propose a real-world character restoration network (RCRN) to effectively restore degraded character images, where character skeleton information and scale-ensemble feature extraction are utilized to obtain better restoration performance. The proposed method consists of a skeleton extractor (SENet) and a character image restorer (CiRNet). SENet aims to preserve the structural consistency of the character and normalize complex noise. Then, CiRNet reconstructs clean images from degraded character images and their skeletons. Due to the lack of benchmarks for real-world character image restoration, we constructed a dataset containing 1,606 character images with real-world degradation to evaluate the validity of the proposed method. The experimental results demonstrate that RCRN outperforms state-of-the-art methods quantitatively and qualitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07795v2-abstract-full').style.display = 'none'; document.getElementById('2207.07795v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACM MM 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.07564">arXiv:2207.07564</a> <span> [<a href="https://arxiv.org/pdf/2207.07564">pdf</a>, <a href="https://arxiv.org/format/2207.07564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Attention Mechanism in Time Series Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bowen Zhao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huanlai Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinhan Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+F">Fuhong Song</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zhiwen Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.07564v1-abstract-short" style="display: inline;"> Attention-based models have been widely used in many areas, such as computer vision and natural language processing. However, relevant applications in time series classification (TSC) have not been explored deeply yet, causing a significant number of TSC algorithms still suffer from general problems of attention mechanism, like quadratic complexity. In this paper, we promote the efficiency and per… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07564v1-abstract-full').style.display = 'inline'; document.getElementById('2207.07564v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.07564v1-abstract-full" style="display: none;"> Attention-based models have been widely used in many areas, such as computer vision and natural language processing. However, relevant applications in time series classification (TSC) have not been explored deeply yet, causing a significant number of TSC algorithms still suffer from general problems of attention mechanism, like quadratic complexity. In this paper, we promote the efficiency and performance of the attention mechanism by proposing our flexible multi-head linear attention (FMLA), which enhances locality awareness by layer-wise interactions with deformable convolutional blocks and online knowledge distillation. What's more, we propose a simple but effective mask mechanism that helps reduce the noise influence in time series and decrease the redundancy of the proposed FMLA by masking some positions of each given series proportionally. To stabilize this mechanism, samples are forwarded through the model with random mask layers several times and their outputs are aggregated to teach the same model with regular mask layers. We conduct extensive experiments on 85 UCR2018 datasets to compare our algorithm with 11 well-known ones and the results show that our algorithm has comparable performance in terms of top-1 accuracy. We also compare our model with three Transformer-based models with respect to the floating-point operations per second and number of parameters and find that our algorithm achieves significantly better efficiency with lower complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07564v1-abstract-full').style.display = 'none'; document.getElementById('2207.07564v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.07269">arXiv:2207.07269</a> <span> [<a href="https://arxiv.org/pdf/2207.07269">pdf</a>, <a href="https://arxiv.org/format/2207.07269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Weakly Supervised Video Salient Object Detection via Point Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shuyong Gao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haozhe Xing</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qianyu Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenqiang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.07269v1-abstract-short" style="display: inline;"> Video salient object detection models trained on pixel-wise dense annotation have achieved excellent performance, yet obtaining pixel-by-pixel annotated datasets is laborious. Several works attempt to use scribble annotations to mitigate this problem, but point supervision as a more labor-saving annotation method (even the most labor-saving method among manual annotation methods for dense predicti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07269v1-abstract-full').style.display = 'inline'; document.getElementById('2207.07269v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.07269v1-abstract-full" style="display: none;"> Video salient object detection models trained on pixel-wise dense annotation have achieved excellent performance, yet obtaining pixel-by-pixel annotated datasets is laborious. Several works attempt to use scribble annotations to mitigate this problem, but point supervision as a more labor-saving annotation method (even the most labor-saving method among manual annotation methods for dense prediction), has not been explored. In this paper, we propose a strong baseline model based on point supervision. To infer saliency maps with temporal information, we mine inter-frame complementary information from short-term and long-term perspectives, respectively. Specifically, we propose a hybrid token attention module, which mixes optical flow and image information from orthogonal directions, adaptively highlighting critical optical flow information (channel dimension) and critical token information (spatial dimension). To exploit long-term cues, we develop the Long-term Cross-Frame Attention module (LCFA), which assists the current frame in inferring salient objects based on multi-frame tokens. Furthermore, we label two point-supervised datasets, P-DAVIS and P-DAVSOD, by relabeling the DAVIS and the DAVSOD dataset. Experiments on the six benchmark datasets illustrate our method outperforms the previous state-of-the-art weakly supervised methods and even is comparable with some fully supervised approaches. Source code and datasets are available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07269v1-abstract-full').style.display = 'none'; document.getElementById('2207.07269v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by ACM MM 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.06351">arXiv:2207.06351</a> <span> [<a href="https://arxiv.org/pdf/2207.06351">pdf</a>, <a href="https://arxiv.org/format/2207.06351">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.patcog.2022.108806">10.1016/j.patcog.2022.108806 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Joint Prediction of Monocular Depth and Structure using Planar and Parallax Geometry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yifan Cao</a>, <a href="/search/cs?searchtype=author&query=Biber%2C+M">Maximilian Biber</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mingchuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Burschka%2C+D">Darius Burschka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.06351v1-abstract-short" style="display: inline;"> Supervised learning depth estimation methods can achieve good performance when trained on high-quality ground-truth, like LiDAR data. However, LiDAR can only generate sparse 3D maps which causes losing information. Obtaining high-quality ground-truth depth data per pixel is difficult to acquire. In order to overcome this limitation, we propose a novel approach combining structure information from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06351v1-abstract-full').style.display = 'inline'; document.getElementById('2207.06351v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.06351v1-abstract-full" style="display: none;"> Supervised learning depth estimation methods can achieve good performance when trained on high-quality ground-truth, like LiDAR data. However, LiDAR can only generate sparse 3D maps which causes losing information. Obtaining high-quality ground-truth depth data per pixel is difficult to acquire. In order to overcome this limitation, we propose a novel approach combining structure information from a promising Plane and Parallax geometry pipeline with depth information into a U-Net supervised learning network, which results in quantitative and qualitative improvement compared to existing popular learning-based methods. In particular, the model is evaluated on two large-scale and challenging datasets: KITTI Vision Benchmark and Cityscapes dataset and achieve the best performance in terms of relative error. Compared with pure depth supervision models, our model has impressive performance on depth prediction of thin objects and edges, and compared to structure prediction baseline, our model performs more robustly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06351v1-abstract-full').style.display = 'none'; document.getElementById('2207.06351v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Pattern Recognition, May 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.05493">arXiv:2207.05493</a> <span> [<a href="https://arxiv.org/pdf/2207.05493">pdf</a>, <a href="https://arxiv.org/format/2207.05493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Skeletal Human Action Recognition using Hybrid Attention based Graph Convolutional Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Burschka%2C+D">Darius Burschka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.05493v1-abstract-short" style="display: inline;"> In skeleton-based action recognition, Graph Convolutional Networks model human skeletal joints as vertices and connect them through an adjacency matrix, which can be seen as a local attention mask. However, in most existing Graph Convolutional Networks, the local attention mask is defined based on natural connections of human skeleton joints and ignores the dynamic relations for example between he… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05493v1-abstract-full').style.display = 'inline'; document.getElementById('2207.05493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.05493v1-abstract-full" style="display: none;"> In skeleton-based action recognition, Graph Convolutional Networks model human skeletal joints as vertices and connect them through an adjacency matrix, which can be seen as a local attention mask. However, in most existing Graph Convolutional Networks, the local attention mask is defined based on natural connections of human skeleton joints and ignores the dynamic relations for example between head, hands and feet joints. In addition, the attention mechanism has been proven effective in Natural Language Processing and image description, which is rarely investigated in existing methods. In this work, we proposed a new adaptive spatial attention layer that extends local attention map to global based on relative distance and relative angle information. Moreover, we design a new initial graph adjacency matrix that connects head, hands and feet, which shows visible improvement in terms of action recognition accuracy. The proposed model is evaluated on two large-scale and challenging datasets in the field of human activities in daily life: NTU-RGB+D and Kinetics skeleton. The results demonstrate that our model has strong performance on both dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05493v1-abstract-full').style.display = 'none'; document.getElementById('2207.05493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26th International Conference on Pattern Recognition, 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.14506">arXiv:2206.14506</a> <span> [<a href="https://arxiv.org/pdf/2206.14506">pdf</a>, <a href="https://arxiv.org/format/2206.14506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> An extension of process calculus for asynchronous communications between agents with epistemic states </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huili Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.14506v2-abstract-short" style="display: inline;"> It plays a central role in intelligent agent systems to model agent's epistemic state and its change. Asynchrony plays a key role in distributed systems, in which the messages transmitted may not be received instantly by the agents. To characterize asynchronous communications, asynchronous announcement logic (AAL) has been presented, which focuses on the logic laws of the change of epistemic state… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.14506v2-abstract-full').style.display = 'inline'; document.getElementById('2206.14506v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.14506v2-abstract-full" style="display: none;"> It plays a central role in intelligent agent systems to model agent's epistemic state and its change. Asynchrony plays a key role in distributed systems, in which the messages transmitted may not be received instantly by the agents. To characterize asynchronous communications, asynchronous announcement logic (AAL) has been presented, which focuses on the logic laws of the change of epistemic state after receiving information. However AAL does not involve the interactive behaviours between an agent and its environment. Through enriching the well-known pi-calculus by adding the operators for passing basic facts and applying the well-known action model logic to describe agents' epistemic states, this paper presents the e-calculus to model epistemic interactions between agents with epistemic states. The e-calculus can be adopted to characterize synchronous and asynchronous communications between agents. To capture the asynchrony, a buffer pools is constructed to store the basic facts announced and each agent reads these facts from this buffer pool in some order. Based on the transmission of link names, the e-calculus is able to realize reading from this buffer pool in different orders. This paper gives two examples: one is to read in the order in which the announced basic facts are sent (First-in-first-out, FIFO), and the other is in an arbitrary order. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.14506v2-abstract-full').style.display = 'none'; document.getElementById('2206.14506v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages and 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.09925">arXiv:2205.09925</a> <span> [<a href="https://arxiv.org/pdf/2205.09925">pdf</a>, <a href="https://arxiv.org/format/2205.09925">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> On Jointly Optimizing Partial Offloading and SFC Mapping: A Cooperative Dual-agent Deep Reinforcement Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinhan Wang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huanlai Xing</a>, <a href="/search/cs?searchtype=author&query=Song%2C+F">Fuhong Song</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+S">Shouxi Luo</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+P">Penglin Dai</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bowen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.09925v1-abstract-short" style="display: inline;"> Multi-access edge computing (MEC) and network function virtualization (NFV) are promising technologies to support emerging IoT applications, especially those computation-intensive. In NFV-enabled MEC environment, service function chain (SFC), i.e., a set of ordered virtual network functions (VNFs), can be mapped on MEC servers. Mobile devices (MDs) can offload computation-intensive applications, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09925v1-abstract-full').style.display = 'inline'; document.getElementById('2205.09925v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.09925v1-abstract-full" style="display: none;"> Multi-access edge computing (MEC) and network function virtualization (NFV) are promising technologies to support emerging IoT applications, especially those computation-intensive. In NFV-enabled MEC environment, service function chain (SFC), i.e., a set of ordered virtual network functions (VNFs), can be mapped on MEC servers. Mobile devices (MDs) can offload computation-intensive applications, which can be represented by SFCs, fully or partially to MEC servers for remote execution. This paper studies the partial offloading and SFC mapping joint optimization (POSMJO) problem in an NFV-enabled MEC system, where an incoming task can be partitioned into two parts, one for local execution and the other for remote execution. The objective is to minimize the average cost in the long term which is a combination of execution delay, MD's energy consumption, and usage charge for edge computing. This problem consists of two closely related decision-making steps, namely task partition and VNF placement, which is highly complex and quite challenging. To address this, we propose a cooperative dual-agent deep reinforcement learning (CDADRL) algorithm, where we design a framework enabling interaction between two agents. Simulation results show that the proposed algorithm outperforms three combinations of deep reinforcement learning algorithms in terms of cumulative and average episodic rewards and it overweighs a number of baseline algorithms with respect to execution delay, energy consumption, and usage charge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09925v1-abstract-full').style.display = 'none'; document.getElementById('2205.09925v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.12028">arXiv:2202.12028</a> <span> [<a href="https://arxiv.org/pdf/2202.12028">pdf</a>, <a href="https://arxiv.org/format/2202.12028">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Evolutionary Multi-Objective Reinforcement Learning Based Trajectory Control and Task Offloading in UAV-Assisted Mobile Edge Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+F">Fuhong Song</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huanlai Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinhan Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+S">Shouxi Luo</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+P">Penglin Dai</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zhiwen Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bowen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.12028v1-abstract-short" style="display: inline;"> This paper studies the trajectory control and task offloading (TCTO) problem in an unmanned aerial vehicle (UAV)-assisted mobile edge computing system, where a UAV flies along a planned trajectory to collect computation tasks from smart devices (SDs). We consider a scenario that SDs are not directly connected by the base station (BS) and the UAV has two roles to play: MEC server or wireless relay.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12028v1-abstract-full').style.display = 'inline'; document.getElementById('2202.12028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.12028v1-abstract-full" style="display: none;"> This paper studies the trajectory control and task offloading (TCTO) problem in an unmanned aerial vehicle (UAV)-assisted mobile edge computing system, where a UAV flies along a planned trajectory to collect computation tasks from smart devices (SDs). We consider a scenario that SDs are not directly connected by the base station (BS) and the UAV has two roles to play: MEC server or wireless relay. The UAV makes task offloading decisions online, in which the collected tasks can be executed locally on the UAV or offloaded to the BS for remote processing. The TCTO problem involves multi-objective optimization as its objectives are to minimize the task delay and the UAV's energy consumption, and maximize the number of tasks collected by the UAV, simultaneously. This problem is challenging because the three objectives conflict with each other. The existing reinforcement learning (RL) algorithms, either single-objective RLs or single-policy multi-objective RLs, cannot well address the problem since they cannot output multiple policies for various preferences (i.e. weights) across objectives in a single run. This paper adapts the evolutionary multi-objective RL (EMORL), a multi-policy multi-objective RL, to the TCTO problem. This algorithm can output multiple optimal policies in just one run, each optimizing a certain preference. The simulation results demonstrate that the proposed algorithm can obtain more excellent nondominated policies by striking a balance between the three objectives regarding policy quality, compared with two evolutionary and two multi-policy RL algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12028v1-abstract-full').style.display = 'none'; document.getElementById('2202.12028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.00011">arXiv:2201.00011</a> <span> [<a href="https://arxiv.org/pdf/2201.00011">pdf</a>, <a href="https://arxiv.org/format/2201.00011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> An Efficient Federated Distillation Learning System for Multi-task Time Series Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huanlai Xing</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zhiwen Xiao</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+R">Rong Qu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zonghai Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bowen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.00011v1-abstract-short" style="display: inline;"> This paper proposes an efficient federated distillation learning system (EFDLS) for multi-task time series classification (TSC). EFDLS consists of a central server and multiple mobile users, where different users may run different TSC tasks. EFDLS has two novel components, namely a feature-based student-teacher (FBST) framework and a distance-based weights matching (DBWM) scheme. Within each user,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.00011v1-abstract-full').style.display = 'inline'; document.getElementById('2201.00011v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.00011v1-abstract-full" style="display: none;"> This paper proposes an efficient federated distillation learning system (EFDLS) for multi-task time series classification (TSC). EFDLS consists of a central server and multiple mobile users, where different users may run different TSC tasks. EFDLS has two novel components, namely a feature-based student-teacher (FBST) framework and a distance-based weights matching (DBWM) scheme. Within each user, the FBST framework transfers knowledge from its teacher's hidden layers to its student's hidden layers via knowledge distillation, with the teacher and student having identical network structure. For each connected user, its student model's hidden layers' weights are uploaded to the EFDLS server periodically. The DBWM scheme is deployed on the server, with the least square distance used to measure the similarity between the weights of two given models. This scheme finds a partner for each connected user such that the user's and its partner's weights are the closest among all the weights uploaded. The server exchanges and sends back the user's and its partner's weights to these two users which then load the received weights to their teachers' hidden layers. Experimental results show that the proposed EFDLS achieves excellent performance on a set of selected UCR2018 datasets regarding top-1 accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.00011v1-abstract-full').style.display = 'none'; document.getElementById('2201.00011v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xing%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xing%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xing%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>